提取VCF文件测序深度和基因型信息,示例如下,结果保存至excel文件中。
程序代码
import os
import sys
import openpyxl
import pandas as pd
def get_depth_genetpye(vcf_path, output_path):
print("read raw vcf...")
df = pd.read_csv(vcf_path, sep='\t', skiprows=50)
df_filter = df.iloc[:, [0,1,-1]]
sample = list(df_filter.columns)[-1]
for idx, row in df_filter.iterrows():
info = row[sample]
if ':' in info:
list_info = info.split(':')
df_filter.loc[idx, 'Site_depth'] = ':'.join([list_info[1], list_info[2]])
df_filter.loc[idx, 'Depth'] = ':'.join([list_info[1], list_info[2]])
df_filter.loc[idx,'Genetype'] = str(list_info[0])
else:
df_filter.loc[idx, 'Site_depth'] = 'ERROR'
df_filter.loc[idx, 'Depth'] = 'ERROR'
df_filter.loc[idx, 'Genetype'] = 'ERROR'
df_filter = df_filter[['#CHROM', 'POS', 'Depth', 'Genetype']]
df_filter.columns = ['Chr', 'Start', 'Depth', 'Genetype']
df_filter.to_excel(output_path, index=False)
结果文件