用python脚本将DNA序列的.fa文件格式转换为.npy

 #.fa 文件转换为 .npy之后作为DL学习的原始数据

# from .fa gettig npy(train/valid/test)
import os 
import numpy as np

path = os.getcwd()
#################get the ENHANCER proper .fa file#################
enh_list = []
f_enh = open(path+'/'+'C_10K_GM12878.csv_enhancer.fa','r') #-***- enhancer.fa -***- #
for line in f_enh.readlines():
	line = line.strip("/n")
	enh_list.append(line)
f_enh.close()
enh_list = enh_list[0:1610]   #16106 - 6 is the the time of 10
def Data_Set_enh(tr_enh_num,va_enh_num,te_enh_num): #0.8/0.1/0.1
	enh_tr_num = tr_enh_num  * len(enh_list)
	enh_va_num = va_enh_num  * len(enh_list)
	enh_te_num = te_enh_num  * len(enh_list)

	enh_tr = enh_list[0:enh_tr_num]
	enh_va = enh_list[enh_tr_num:enh_tr_num+enh_va_num] 
	enh_te = enh_list[enh_tr_num+enh_va_num:]
	return enh_tr,enh_va,enh_te
	
################get the PROMOTER proper .fa file################
pro_list = []
f_pro  = open(path+'/'+'C_10K_GM12878.csv_promoter.fa','r') #-***- promoter.fa -***- #
for line in f_pro.readlines():
	line = line.strip("/n")
	pro_list.append(line)
f_pro.close()
pro_list = pro_list[0:1610]   #16106 - 6 is the the time of 10
def Data_Set_pro(tr_pro_num,va_pro_num,te_pro_n
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值