获取kegg基因名称,Entry号是hsa04010
r语言:
library("KEGGREST")
path_way<-'hsa04010'
keggGet(path_way)
gs<-keggGet(path_way)
#获取通路中gene信息
gs[[1]]$GENE
#查找所有基因
genes<-unlist(lapply(gs[[1]]$GENE,function(x) strsplit(x,';')))
genelist <- genes[1:length(genes)%%3 ==2]
genelist <- data.frame(genelist)
#把结果写入表格中
# 设置路径
path <- "~/Desktop/paper/data/pathway/"
# 将列表输出为CSV文件
write.csv(genelist, file = paste0(path, "genelist.csv"), row.names = FALSE)
获得csv文件后使用python进行合并和去重
import pandas as pd df1 = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/cell_cycle.csv') df2 = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/RTK-RAS.csv') df3 = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/p53.csv') df4 = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/pi3k.csv') # 将四个数据框的第一列连接起来 merged_column = pd.concat([df1.iloc[:, 0], df2.iloc[:, 0], df3.iloc[:, 0], df4.iloc[:, 0]], ignore_index=True) # 将合并后的数据保存到一个新的CSV文件 df_merged = pd.DataFrame(merged_column, columns=['genelist']) df_merged.to_csv('/Users/yaodashuai/Desktop/paper/data/pathway/merged.csv', index=False) # 读取合并后的数据 merged_genes = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/merged.csv') # 对基因进行去重 unique_genes = merged_genes['genelist'].drop_duplicates() # 创建带有去重基因的DataFrame df_unique = pd.DataFrame(unique_genes, columns=['genelist']) # 保存去重后的数据到一个新的CSV文件 df_unique.to_csv('/Users/yaodashuai/Desktop/paper/data/pathway/quchong_data.csv', index=False)
# 读取gene列表 path_genes = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/pathway/quchong_data.csv') # 提取基因名称列 gene_names = path_genes.iloc[:, 0] # 假设基因名称在第一列 # 读取基因表达谱数据 gene_expression_data = pd.read_csv('/Users/yaodashuai/Desktop/paper/data/data_log/ov.csv') # 根据基因名称筛选pathway的表达数据 differential_expression_data = gene_expression_data[gene_expression_data.iloc[:, 0].isin(gene_names)] # 假设基因表达谱中的基因名称也在第一列 # 将DEGs的表达数据保存为新的CSV文件 differential_expression_data.to_csv('/Users/yaodashuai/Desktop/paper/data/test/pathway.csv', index=False)