1.原始数据,finalreport
读取finalreport
import pandas as pd
# 表头在第9行
df_finalreport=pd.read_csv(r".\FinalReport.txt",sep="\t",header=9)
# top链
df_finalreport["allele"]=df_finalreport["Allele1 - Top"]+" "+df_finalreport["Allele2 - Top"]
# forward链
#df_finalreport["allele"]=df_finalreport["Allele1 - Forward"]+" "+df_finalreport["Allele2 - Forward"]
# 按照ID分组,并将allele用空格连接
df_new = df_finalreport.groupby('Sample ID')['allele'].apply(lambda x: ' '.join(x)).reset_index()
#finalreport缺失值为“-”,替换为ped中的0
df_new['allele']=df_new['allele'].apply(lambda x: x.replace("-","0"))
2.输出文件(有其他格式的ped文件)
#有forward格式ped
# 读取ped文件
snp_map_file = pd.read_table(open(r".\修改前.ped"),delim_whitespace=True,encoding="gb18030",header=None,low_memory=False)
fh_914out=pd.concat([snp_map_file.iloc[:,:6],df_new["allele"]], axis=1).reset_index(drop=True)
fh_914out.to_csv(r".\修改后.ped",sep='\t',index = None,header=None)
2.输出文件(没有其他格式的ped文件)
#没有forward格式ped
df_new=df_new.reset_index()
df_new["index"]=df_new["index"]+1
df_new[["sire","dam","sex","phenotype"]]=0
df_new[["index","Sample ID","sire","dam","sex","phenotype","allele"]].to_csv(r".\修改后.ped",sep='\t',index = None,header=None)