安装包
library( clusterProfiler)
library( tidyr)
library( "stringr" )
library( topGO)
library( ggplot2)
BiocManager:: install( 'topGO' )
BiocManager:: install( 'Rgraphviz' )
GO数据准备
go_id<- read.table( "swiss_go.out" , sep = "\t" , quote = "" )
go_idn<- separate_rows( go_id, V2, sep = ',' )
go_term2gene<- data.frame( go_idn$ V2, go_idn$ V1)
names( go_term2gene) <- c( "go_term" , "gene" )
go_name<- go2term( go_idn$ V2)
go_term2name<- data.frame( go_name$ go_id, go_name$ Term)
names( go_term2name) <- c( "go_term" , "name" )
准备间接注释
go_all_term2gene<- buildGOmap( go_term2gene)
KEGG数据准备
kegg_id<- read.table( "kegg.out" , sep = "\t" , quote = "" )
names( kegg_id) <- c( "ID" , "kegg" )
准备ID与KEGG Pathway的对应信息
kegg_path<- bitr_kegg( kegg_id$ kegg, "kegg" , "Path" , "ko" )
kegg_id2path<- merge( kegg_id, kegg_path)
kegg_term2gene<- data.frame( kegg_id2path$ Path, kegg_id2path$ ID)
names( kegg_term2gene) <- c( "ko_term" , "gene" )
GO_KEGG_ERH<- function ( gene_file_name) {
in_file_path= "cluster_profiler_2"
out_file_path= "cluster_profiler_2"
file_tax<- str_match( gene_file_name, "(.*\\.)" ) [ , 2 ]
file_tax
gene<- read.table( paste( in_file_path, gene_file_name, sep = "\\" ) )
gene<- as.factor( gene$ V1)
go_enrich_direct<- enricher( gene = gene, pvalueCutoff = 0.05 , pAdjustMethod = "BH" ,
TERM2GENE = go_term2gene, TERM2NAME = go_term2name)
go_enrich_indirect<- enricher( gene= gene, pvalueCutoff= 0.05 , pAdjustMethod= "BH" ,
TERM2GENE= go_all_term2gene, TERM2NAME= go_term2name)
输出仅直接注释的结果
go_erh<- as.data.frame( go_enrich_direct)
go_ont<- go2ont( go_erh$ ID)
names( go_ont) <- c( "ID" , "Ontology" )
go_out_dt<- merge( go_erh, go_ont)
go_out_dt<- go_out_dt[ order( go_out_dt$ Ontology, go_out_dt$ qvalue) , ]
write.csv( go_out_dt, paste( out_file_path, "\\GO_dct_" , file_tax, "csv" , sep = "" ) )
输出包括间接注释结果
go_erh_i<- as.data.frame( go_enrich_indirect)
go_ont_i<- go2ont( go_erh_i$ ID)
names( go_ont_i) <- c( "ID" , "Ontology" )
go_out_idt<- merge( go_erh_i, go_ont_i)
go_term_i<- go2term( go_erh_i$ ID)
names( go_term_i) <- c( "ID" , "Term" )
go_out_idt<- merge( go_out_idt, go_term_i)
go_out_idt<- cbind( go_out_idt$ ID, go_out_idt[ , 11 ] , go_out_idt[ , 3 : 10 ] )
names( go_out_idt) [ 1 : 2 ] <- c( "ID" , "Description" )
go_out_idt<- go_out_idt[ order( go_out_idt$ Ontology, go_out_idt$ qvalue) , ]
write.csv( go_out_idt, paste( "GO_indct_" , file_tax, "csv" , sep = "" ) , row.names = F)
kegg_enrich<- enricher( gene = gene, pvalueCutoff = 0.05 , pAdjustMethod = "BH" ,
TERM2GENE = kegg_term2gene)
输出结果
kegg_erh<- as.data.frame( kegg_enrich)
kegg_name<- ko2name( kegg_erh$ ID)
names( kegg_name) <- c( "ID" , "koname" )
kegg_out<- merge( kegg_erh, kegg_name)
kegg_out<- cbind( kegg_out$ ID, kegg_out[ , 10 ] , kegg_out[ , 3 : 9 ] )
names( kegg_out) [ 1 : 2 ] <- c( "ID" , "Description" )
write.csv( kegg_out, paste( "KEGG_" , file_tax, "csv" , sep = "" ) )
}
GO_KEGG_ERH( "geneID_F_VS_X_up.csv" )
GO条形图
slimgo = read.table( "tableS3.csv" , header= T, sep= "," )
slimgo = slimgo[ order( slimgo$ Ontology, slimgo$ p.adjust) , ]
slimgo$ Description= factor( slimgo$ Description, levels= slimgo$ Description)
pp= ggplot( slimgo, aes( x= Description, y= Count, fill= Ontology) )
pp+ geom_bar( stat= "identity" )
pbar= pp+ geom_bar( stat= "identity" ) + coord_flip( ) + scale_x_discrete( limits= rev( levels( slimgo$ term_name) ) )
pr= pbar+ scale_fill_discrete( name= "Ontology" , breaks= c( "GO:MF" , "GO:BP" , "GO:CC" ) ) + ylab( "Count" ) + xlab( "GO Term" )
pr+ theme_bw( )
KEGG气泡图
pathway= head( read.csv( "KEGG_geneID_X_VS_F_up.csv" , header= T) , n= 50 )
pp = ggplot( pathway, aes( Count, Description) )
pp + geom_point( )
pp + geom_point( aes( size= Count) )
pbubble = pp + geom_point( aes( size= Count, color= - 1 * log10( x = p.adjust) ) )
pbubble + scale_colour_gradient( low= "green" , high= "red" )
pr = pbubble + scale_colour_gradient( low= "green" , high= "red" ) + labs( color= expression( - log[ 10 ] ( p.adjust) ) , size= "Gene number" , x= "Count" , y= "Pathway name" , title= "Significantly pathway enrichment" )
pr + theme_bw( )
画饼图
BUSCO<- data.frame(
type<- c( "Complete and single-copy BUSCOs (S)" ,
"Complete and duplicated BUSCOs (D)" ,
"Fragmented BUSCOs (F)" ,
"Missing BUSCOs (M)" ) ,
value<- c( 115 , 119 , 21 , 0 )
)
bu<- ggplot( BUSCO, aes( x= "" , y= value, fill= type) ) + geom_bar( width = 1 , stat = "identity" )
bu
pie<- bu+ coord_polar( "y" , start = 0 ) + theme_void( )
pie
pie+ cowplot:: theme_cowplot( )
ORF
ORF<- data.frame(
group<- c( "assembled unigenes don't contain ORFs" ,
"assembled unigenes contain only one ORF" ,
"assembled unigenes contain more than one ORF" ) ,
value<- c( 6781 , 919 , 930 )
)
bp<- ggplot( ORF, aes( x= "" , y= value, fill= group) ) + geom_bar( width = 1 , stat = "identity" )
bp
pie<- bp+ coord_polar( "y" , start= 0 ) + theme_void( )
pie+ cowplot:: theme_cowplot( )
画韦恩图
install.packages( "VennDiagram" )
library( grid)
library( futile.logger)
library( VennDiagram)
venn.plot<- draw.quad.venn(
area1 = 63490 ,
area2 = 108439 ,
area3 = 35526 ,
area4 = 111065 ,
n12 = 49523 ,
n13 = 11988 ,
n14 = 50531 ,
n23 = 33682 ,
n24 = 108439 ,
n34 = 33885 ,
n123 = 11931 ,
n124 = 49523 ,
n134 = 11975 ,
n234 = 33682 ,
n1234 = 11931 ,
category = c( "COGs" , "GO" , "KEGG" , "Swiss-Prot" ) ,
fill = c( "orange" , "red" , "green" , "blue" ) ,
lty = "dashed" ,
cex = 1.4 ,
cat.cex = 2 ,
cat.col = c( "orange" , "red" , "green" , "blue" )
) ;
grid.draw( venn.plot)
画COG注释图
m<- read.csv( "COG_result.txt" , header = T, sep = "\t" )
m<- m[ , - 1 ]
layout( matrix( c( 1 , 2 ) , nrow = 1 ) , widths = c( 20 , 13 ) )
layout.show( 2 )
par( mar= c( 3 , 4 , 4 , 1 ) + 0.1 )
class<- c( "A" , "B" , "C" , "D" , "E" , "F" , "G" , "H" , "I" , "J" , "K" , "L" ,
"M" , "N" , "O" , "P" , "Q" , "R" , "S" , "T" , "U" , "V" , "W" , "X" , "Y" , "Z" )
t<- factor( as.character( m$ Code) , levels = class)
m[ order( t) , ]
m<- m[ order( t) , ]
barplot( m$ unigenes, space = F, col = rainbow( 26 ) , ylab = "Number of genes" ,
names.arg = m$ Code)
par( mar= c( 2 , 0 , 2 , 1 ) + 0.1 )
plot( 0 , 0 , type = "n" , xlim = c( 0 , 1 ) , ylim = c( 0 , 26 ) ,
bty= "n" , axes = F, xlab = "" , ylab = "" )
for ( i in 1 : length( class) ) {
text( 0 , 26 - i+ 0.5 , paste( m$ Code[ i] , m$ Functional.Categories[ i] ) ,
pos = 4 , cex = 1 , pty= T)
}
draw CDS
CDS.plot<- draw.quad.venn(
area1 = 7830 ,
area2 = 1153 ,
area3 = 103749 ,
area4 = 93919 ,
n12 = 110 ,
n13 = 10449 ,
n14 = 919 ,
n23 = 7 ,
n24 = 30 ,
n34 = 93919 ,
n123 = 440 ,
n124 = 1110 ,
n134 = 9379 ,
n234 = 10 ,
n1234 = 0 ,
category = c( "assembled unigenes" , "total ORFs" , "assembled unigenes" , "unigenes contained only one ORF" ) ,
fill = rainbow( 4 ) ,
lty = "dashed" ,
cex = 1.4 ,
cat.cex = 1 ,
cat.col = rainbow( 4 )
) ;
grid.draw( CDS.plot)