import pandas as pd
sheet1 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python练习\\文本流\\chrom 1\\chrom1_map.xlsx')
sheet1
print(type(sheet1))
sheet1.dtypes
#传入SNP的postion,并提取SNP位点信息
pos = 315320300
type(pos)
data10 = sheet1[ sheet1['4'] > (pos-500000)]
data11 = data10[ data10['4'] <(pos + 500000)]
data11
data11.shape
data12 = data11.drop(['1', '3'], axis=1)
data11.shape
data12
#output map.txt
data12.to_csv("C:\\Users\\windows10\\Desktop\\Python练习\\文本流\\chrom 1\\chrom1_test_map.csv", sep = '\t', index = False, header = False)
#提取SNP位点信息的SNPname,并生成列表
data12 = data11.iloc[:, 1]
data12
type(data12)
data13 = data12.tolist()
len(data13)
#按照SNPname提取ped文件中的目标碱基队列
sheet2 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python练习\\文本流\\chrom 1\\chrom1_snp.xlsx')
sheet2
sheet2.columns
data20 = sheet2.iloc[:, :6]
data20
data21 = sheet2.iloc[:, 6:]
data21
for i in data13:
data20 = pd.concat([data20, data21[i]], axis =1)
data20 = pd.concat([data20, data21[i + str('.1')]], axis =1) #由于列名重合,对重合列的提取
data20.head(20)
data20.columns
data20.shape
data20
#output snp.txt
data20.to_csv("C:\\Users\\windows10\\Desktop\\Python练习\\文本流\\chrom 1\\chrom1_test_snp.csv", sep = '\t', index = False, header = False)
Python pandas 单条 染色体体 位置 区间 SNP 数据 提取 haploview
最新推荐文章于 2022-11-09 15:44:57 发布