图片
话不多数,先上一波效果图
代码
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 22 08:33:24 2021
@author: dujidan
"""
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("ggplot")
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def read_column_float_file(input_file, start_row=0, n_col='all'):
out_list = []
with open(input_file, encoding='utf-8') as f_in:
data_list = f_in.read().strip('\n').split('\n')
if n_col == 'all':
for line in data_list[start_row:]:
out_list.append([float(j) for j in line.split('\t') if j != ''])
else:
for line in data_list[start_row:]:
col_value = line.split('\t')[n_col]
if col_value != '':
out_list.append(float(col_value))
return out_list, data_list[0].split('\t')
# 箱线图,合图 2*2比较
def boxplot_2D_inOne(list_1, list_2, out_file='./boxplot_2D.png', suptitle_name='Box plot ', nrow=5, ncol=8):
"""
list_1, list_2: [] 1D; len(A) = len(B)
out_file: /home/test.png
"""
plt.figure(figsize=(50, 30))
for i in range(len(list_1)):
plt.subplot(nrow, ncol, i+1)
plt.boxplot((list_1[i], list_2[i]), labels=('before', 'after'))
plt.legend()
plt.suptitle(suptitle_name, fontsize=60)
plt.savefig(out_file, dpi=500)
# 密度曲线图,合图 2*2比较
def distplot_2D_inOne(list_1, list_2, out_file='./doxplot_2D.png', suptitle_name='Dist plot', nrow=5, ncol=8):
plt.figure(figsize=(50, 30))
for i in range(len(list_1)):
plt.subplot(nrow, ncol, i+1)
sns.distplot(list_1[i], kde=True, hist=True, label='before')
sns.distplot(list_2[i], kde=True, hist=True, label='after')
plt.legend()
plt.xlabel('深度')
plt.ylabel('密度曲线')
plt.suptitle(suptitle_name, fontsize=60)
plt.savefig(out_file, dpi=500)
def read_float_file(input_file, start_row=0):
out_list, index_list = [], []
with open(input_file, encoding='utf-8') as f_in:
data_list = f_in.read().strip().split('\n')
index_list = data_list[0]
for line in data_list[start_row:]:
out_list.append([float(j) for j in line.split('\t') if j != ''])
return out_list, index_list
def plot_percent_line(all_list, index_list):
"""
all_list:2D list
index_list:图例名称列表
"""
fig, ax = plt.subplots()
label_count = -1
x_ticks = np.arange(0, 1, 0.1)
for a_list in all_list:
label_count += 1
ret_list = []
for set_num in x_ticks:
count = 0
for i in a_list:
if i >= set_num:
count += 1
ret_list.append(count/len(a_list))
ax.plot(x_ticks, ret_list, 'o-', label=index_list[label_count])
plt.xticks(x_ticks)
for i, txt in enumerate(ret_list):
ax.annotate(txt, (x_ticks[i], ret_list[i]))
print(x_ticks[i], ret_list[i])
plt.legend()
plt.title('score 累计趋势')
plt.xlabel(' 得分')
plt.ylabel('> 得分 的位点占比')
plt.savefig('得分统计分布-累计图.png', dpi=500)
if __name__ == "__main__":
depth_file_20 = '202012_all_depth.xls'
depth_file_21 = '202101_all_depth.xls'
list_1, __ = read_column_float_file(depth_file_20, start_row=1, n_col='all')
list_2, __ = read_column_float_file(depth_file_21, start_row=1, n_col='all')
distplot_2D_inOne(list_1, list_2, out_file='./doxplot_2D.png', suptitle_name='Dist plot', nrow=5, ncol=8)
boxplot_2D_inOne(list_1, list_2, out_file='./boxplot_2D.png', suptitle_name='Box plot ', nrow=5, ncol=8)
# score
score_file = 'score.xls'
a_list, __ = read_column_float_file(score_file, start_row=1, n_col=0)
b_list, __ = read_column_float_file(score_file, start_row=1, n_col=1)
c_list, __ = read_column_float_file(score_file, start_row=1, n_col=2)
d_list, index_list = read_column_float_file(score_file, start_row=1, n_col=3)
plt.boxplot((a_list, b_list, c_list, d_list), labels=(index_list))
plt.title('得分统计分布-箱线图')
plt.savefig('得分统计分布-箱线图.png', dpi=500)
plt.title('得分统计分布-密度图')
sns.distplot(a_list, label=index_list[0])
sns.distplot(b_list, label=index_list[1])
sns.distplot(c_list, label=index_list[2])
sns.distplot(d_list, label=index_list[3])
plt.legend()
plt.savefig('得分统计分布-密度图.png', dpi=500)
all_list = [a_list, b_list, c_list, d_list]
plot_percent_line(all_list, index_list