matplotlib 画图笔记柱状图、曲线图、box图

风风是超人

已于 2023-10-27 11:12:51 修改

阅读量443

点赞数

文章标签： matplotlib 笔记 python

于 2021-02-07 17:14:14 首次发布

本文链接：https://blog.csdn.net/qq_42962326/article/details/113743647

版权

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

GCcounttxt 文件，5列，“chrom”,“start”,“end”,“区间内gc比例”,“reads”
在这里插入图片描述
bed文件

画柱状图，曲线图，以及注释

def draw_pic(GCcounttxt):
	# GCcounttxt 5列，"chrom","start","end","gc","reads"
    sampleid = GCcounttxt.split(".")[0]
    data = pd.read_table(GCcounttxt, names=["chrom","start","end","gc","reads"])

    datasort = data.sort_values(by=["gc"])

    gc = list(datasort["gc"])
    reads = list(datasort["reads"])
    bin_num = np.arange(len(datasort))
#############################################################

    annobed = pd.read_table("anno.bed", names=["chrom","start","end","gene"])
    mergedata = pd.merge(datasort, annobed, on=["chrom","start","end"])
    mergedata["gene"] = mergedata["gene"].apply(lambda x:x.split(",")[0])   #只保留第一个基因
    mergedata = mergedata.loc[mergedata["gene"] !="-"]

    group_gene = mergedata.groupby('gene')

    dict1={"gene":[], "gc_gene":[], "read_gene":[], "gene_len":[]}
    for gene , group in group_gene:
        gc_gene = group["gc"].mean()
        read_gene = group["reads"].sum()
        group["bed_len"] = group["end"]- group["start"]
        gene_len = group["bed_len"].sum()
        read_gene2 = read_gene / gene_len   # 标准化基因覆盖的reads
        dict1["gene"].append(gene)
        dict1["gc_gene"].append(gc_gene)
        dict1["read_gene"].append(read_gene2)
        dict1["gene_len"].append(gene_len)
    gene_frame = pd.DataFrame(dict1)
    gene_frame = gene_frame.sort_values(by=["gc_gene"]) # 按照基因gc含量排序
    gene_frame = gene_frame.reset_index(drop=True)
    out = "%s.gene.txt"%sampleid
    gene_frame.to_csv(out,sep="\t",index=False)
###############################################################

    gene_num = np.arange(len(gene_frame["gene"]))
    gc = list(gene_frame["gc_gene"])
    reads = list(gene_frame["read_gene"])


    CEBPA = gene_frame.loc[gene_frame["gene"]=="CEBPA"]
    index1 = list(CEBPA.index)[0] #CEBPA 基因横坐标
    gc1 = list(CEBPA["gc_gene"])[0] # gc含量 ，左y轴坐标
    read_gene1 = list(CEBPA["read_gene"])[0] # reads数 ，右y轴坐标
    
    NPM1 = gene_frame.loc[gene_frame["gene"]=="NPM1"]
    index2 = list(NPM1.index)[0]
    gc2 = list(NPM1["gc_gene"])[0]
    read_gene2 = list(NPM1["read_gene"])[0]

    print(NPM1)
    print(CEBPA)
    with PdfPages("%s.pdf"%sampleid) as pdf:
        fig,ax1 = plt.subplots(figsize=(21,10))
        plt.title(sampleid)
        ax1.plot(gene_num, gc,alpha=1, color="g", linestyle='-', linewidth=3.0)   # alpha 透明度，color 柱子颜色；linestyle 线型 ，linewidth 线粗细, s= 10 点的大小
        ax1.set_xlabel('gene', size =18)  # x轴标签
        ax1.set_ylabel('gc(%)', color = 'g',size=18) # y轴标签
        plt.annotate("CEBPA\n gc:%.4f"%gc1 ,
                     xy=(index1, gc1),
                     xytext=(index1-1000, 0.8), 
                     weight="bold", 
                     color='r',
                     size='15',
                     arrowprops=dict(arrowstyle="->",connectionstyle="arc3",color="r"))
       # 对所选基因注释，在图中标出基因信息  xy为 基因横纵坐标位置 ，xytext 为注释的文本信息显示位置坐标，weight  设置字体线型 ,arrowprops dict格式，设置指向箭头的参数，字典中key值有①arrowstyle：设置箭头的样式，其value候选项如’->’,’|-|’,’-|>’,也可以用字符串’simple’,‘fancy’等 ； ②connectionstyle：设置箭头的形状，为直线或者曲线，候选项有’arc3’,‘arc’,‘angle’,‘angle3’，可以防止箭头被曲线内容遮挡
        
        plt.annotate("NPM1\n gc:%.4f"%gc2 ,
                     xy=(index2, gc2),
                     xytext=(index2+1000, 0.6), 
                     weight="bold", 
                     color='r',
                     size='15',
                     arrowprops=dict(arrowstyle="->",connectionstyle="arc3",color="r"))
    
    
        ax2 = ax1.twinx()  # 设置第二坐标轴 twinx 设置第二纵坐标 twiny设置第二横坐标。 这里貌似必须先注释完第一个后才能写， 如果在注释gc前写入，注释时写入的坐标是以第二坐标轴为准的。所以以第一y轴为准的注释应该先注释，然后再写ax1.twinx()

        ax2.scatter(gene_num, reads, alpha=0.4, color="b", linestyle='--', linewidth=1.0)
        ax2.set_ylim(0,10)  #  y轴值 区间
        ax2.set_ylabel('read_count', color = 'b',size=18)

        plt.annotate("CEBPA\nread:%.4f"%read_gene1 ,
                     xy=(index1, read_gene1),
                     xytext=(index1-3000, 0.3), 
                     weight="bold", 
                     color='r',
                     size='15',
                     arrowprops=dict(arrowstyle="->",connectionstyle="arc3",color="r"))
        
        plt.annotate("NPM1\nread:%.4f"%read_gene2 ,
                     xy=(index2, read_gene2),
                     xytext=(index2+1000, 1), 
                     weight="bold", 
                     color='r',
                     size='15',
                     arrowprops=dict(arrowstyle="->",connectionstyle="arc3",color="r"))
    
    
    
        plt.show()
        pdf.savefig(fig)  # 保存图片在pdf
        plt.close()

 draw_pic(GCcounttxt)

在这里插入图片描述

box图

import seaborn as sns
import matplotlib.pyplot as plt

fig1=plt.figure(figsize=(24, 10))
for i in range(6):
    i+=1
    frame=pd.read_table(f'Region{i}.xls') # frame包含需要画图的特征列，以及特征的分组列
    plt.subplot(2,3,i) # 画布上子图为2行3列，6个小图，i为指定的第i个子图
    plt.title(f'Region{i}')
    sns.boxplot(x='clinic_label',y="Methylation Level",data=frame,width=0.3) # x为分类标签，y为画图的特征值
    sns.swarmplot(x='clinic_label',y="Methylation Level", data=df, color="grey",size=4) # 在Box图中添加散点
    #frame.to_csv('Region1.xls',index=False,sep='\t')
plt.tight_layout()
plt.show()

在这里插入图片描述
如果将多个fig1保存为pdf

figlists=[]
将fig1保存在figlists
with PdfPages("result.pdf") as pdf:
    for fig in figlists:
        pdf.savefig(fig)

画显著性比较

frame格式如图
在这里插入图片描述

from statannotations.Annotator import Annotator

for i in ['VIM_dCT','ONECUT2_dCT','TERT-M1_dCT','FGFR3-M1_dCT','TERT-M2_dCT','FGFR3-M2_dCT']:
    fig1=plt.figure(figsize=(20, 10))
    plt.title(i, fontsize=18)
    ax = sns.boxplot(x='label',y=i,data=concatframe,width=0.3)
    sns.swarmplot(x='label',y=i, data=concatframe, color="grey",size=4)
    plt.ylabel(i, size =18)
    #plt.xticks(fontsize=17)
    plt.xlabel('')
    plt.axhline(y=5, color='r', linestyle='--') # 在box图上y=5处画一条横线
    pairs=[("Case", "Benign_disease"), ("Case", "Interference"), ("Benign_disease", "Interference")]
    annotator = Annotator(ax, pairs, data=concatframe, x='label', y=i)  # x为标签列，y为数值列
    annotator.configure(test='Mann-Whitney', text_format='star',line_height=0.03,line_width=1)
    annotator.apply_and_annotate()
    
    plt.show()

在这里插入图片描述

风风是超人

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
1
评论
matplotlib 画图笔记柱状图、曲线图、box图

import matplotlib.pyplot as pltimport numpy as npfrom matplotlib.backends.backend_pdf import PdfPagesimport pandas as pdGCcounttxt 文件，5列，“chrom”,“start”,“end”,“区间内gc比例”,“reads”bed文件画柱状图，曲线图，以及注释def draw_pic(GCcounttxt): # GCcounttxt 5列，"chrom","
复制链接

扫一扫