# Let's read the sequences from the uploaded file and calculate the sequence similarity matrix.# Read the sequences from the uploaded file
file_path ='/mnt/data/similar_seq.txt'withopen(file_path,'r')asfile:
sequences =file.read().split('>')[1:]# Split the sequences and ignore the first empty split# Parse the sequences into a dictionary
seq_dict ={}for seq in sequences:
lines = seq.strip().split('\n')
header = lines[0].split()[0]
sequence =''.join(lines[1:])
seq_dict[header]= sequence
# Extract headers and sequences
headers =list(seq_dict.keys())
sequences =list(seq_dict.values())# Function to calculate sequence similaritydefcalculate_similarity(seq1, seq2):
matches =sum(a == b for a, b inzip(seq1, seq2))return matches /min(len(seq1),len(seq2))# Normalize by the length of the shorter sequence# Calculate the similarity matriximport numpy as np
num_sequences =len(sequences)
similarity_matrix = np.zeros((num_sequences, num_sequences))for i inrange(num_sequences):for j inrange(num_sequences):
similarity_matrix[i, j]= calculate_similarity(sequences[i], sequences[j])
similarity_matrix, headers