1. 引入第三方库
from Bio import SeqIO
import matplotlib.pyplot as plt
2. 写函数
def sequence(file_name):
info_dict = {}
raw = open(file_name, errors='ignore').read()
if file_name[-3:] != 'ab1' or raw[:4] != 'ABIF':
return "wrong file format"
for record in SeqIO.parse(file_name, "abi"):
info_dict["seq"] = record.seq
info_dict["name"] = record.id
anno = record.annotations
letter_anno = record.letter_annotations
abif_raw = anno["abif_raw"]
info_dict["date"] = anno["run_start"] + " to " + anno["run_finish"]
info_dict["spac"] = "{:.2f}".format(abif_raw["SPAC1"])
info_dict["dyep"] = abif_raw["PDMF2"].decode('utf-8')
info_dict["mach"] = abif_raw["MCHN1"].decode('utf-8')
info_dict["modl"] = anno["machine_model"].decode('utf-8')
info_dict["bcal"] = abif_raw["SPAC2"].decode('utf-8')
info_dict["ver1"] = abif_raw["SVER1"].decode('utf-8')
info_dict["ver2"] = abif_raw["SVER2"].decode('utf-8')
data_g = list(abif_raw["DATA9"])
data_a = list(abif_raw["DATA10"])
data_t = list(abif_raw["DATA11"])
data_c = list(abif_raw["DATA12"])
qs = letter_anno["phred_quality"]
for k, v in info_dict.items():
print(k + " : " + v)
print("qs:")
print(qs)
print("g-data:")
print(data_g)
plt.figure()
ticks = [int(i) for i in range(len(data_g))]
plt.plot(ticks, data_a, c='green')
plt.plot(ticks, data_c, c='purple')
plt.plot(ticks, data_g, c='gray')
plt.plot(ticks, data_t, c='red')
plt.show()
3. 导入文件
if __name__ == "__main__":
sequence('文件')
4. 启动函数