import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_csv('SecondhandHouse_view.csv')
plt.rcParams['font.sans-serif'] = 'SimHei'#让标签可以以中文形式呈现
plt.rcParams['axes.unicode_minus']=False
#任务1:散点图
plt.scatter(data['建筑面积'], data['总价'])#需要的列表数据
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析.png')
plt.show()
#折线图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.plot(data['建筑面积'], data['总价'])
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析折线图1.png')
plt.show()
#饼状图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.pie(data['总价'], labels=data['建筑面积'], autopct='%1.1f%%')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析饼状图2.png')
plt.show()
#箱装图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.boxplot(data['总价'], labels=['总价'])
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析箱装图3.png')
plt.show()
# 任务2:将“总价”列的数值按照指定区间划分至不同等级,并绘制不同等级的二手房出售数量分布的柱状图
data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))
grade_counts = data['等级'].value_counts().sort_index()
grade_counts.plot(kind='bar', x='等级', y='总价(万)')
plt.xticks([i for i in range(1, 11)],rotation=45)
plt.xlabel('等级')
plt.ylabel('二手房出售数量')
plt.savefig('柱状图.png')
plt.show()
# 任务3:绘制不同区域二手房平均房价与出售数量的组合图
# 统计不同区域的二手房总价的平均值和二手房出售数量;二手房平均价使用柱状图,
# 二手房出售数量使用折线图,其中x轴数值为二手房区域;由于二手房平均价与出售数量的数值差距过大,
# 设定双y轴与对应的轴标签。
# 统计不同区域的二手房总价的平均值和二手房出售数量
average_prices = data.groupby('区域')['总价'].mean()
sale_quantities = data.groupby('区域')['总价'].count()
from matplotlib.font_manager import FontProperties
# 创建画布和子图
fig, ax1 = plt.subplots()
plt.rcParams['font.sans-serif']='SimHei' #设置中文显示
plt.rcParams['axes.unicode_minus']=False
# 绘制柱状图(二手房平均价)
ax1.bar(average_prices.index, average_prices, alpha=0.5)
ax1.set_ylabel('二手房平均价', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
# 创建第二个坐标轴共享同一个x轴
ax2 = ax1.twinx()
# 绘制折线图(二手房出售数量)
ax2.plot(sale_quantities.index, sale_quantities, color='red', marker='o')
ax2.set_ylabel('二手房出售数量', color='red')
ax2.tick_params(axis='y', labelcolor='red')
# 设置x轴标签为二手房区域
plt.xticks(range(len(average_prices)), average_prices.index)
# 添加图例
ax1.legend(['平均价'], loc='upper left')
ax2.legend(['出售数量'], loc='upper right')
# 显示图形
plt.show()
import numpy as np
import matplotlib.pyplot as plt
data = np.load('国民经济核算季度数据.npz', allow_pickle=True)
columns = data['columns']#表头有多少类别
values = data['values']#对应上面的类别分别是什么数据
print(columns)
print(values)
print(data['values'].shape)#多少类别
import matplotlib.pyplot as plt
import matplotlib as mpl
import random
"""
常见的可视化形式:
1,统计图:直方图、折线图、饼图
2,分布图:热力图、散点图、气泡图
数据可视化工具:
1,分析工具:pandas,SciPy , numpy , sklearn
2,绘图工具:matplotlib, Pychart, reportlab
3,平台工具:Jupyter Notebook, PyCharm
"""
x = [1, 2]
y = [-3, 4]
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文
plt.rcParams['axes.unicode_minus'] = False
plt.title('柱状图')
plt.bar(x, y)
plt.show()
#plt.rcParams['lines.linewidth'] = 10
#plt.rcParams['lines.linestyle'] = '--'#虚线
plt.title('虚线图')
plt.plot(x, y)
plt.show()
# 身高数据
height1 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170, 190]
bins1 = range(110, 191,5) # 定义区间
plt.title('直方图')
# 绘制直方图
plt.hist(height1, bins=bins1)
plt.show()
# 数据
classes = ['c1', 'c2', 'c3']
score = [70, 90, 88]
#图形配置
plt.title('条形图') #标题
plt.xlabel('班级')
plt.ylabel('成绩')
# 条形图
plt.bar(classes, score)
plt.show()
# 数据
year1 = range(2005, 2020)
height2 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
plt.title('折线图')
plt.plot(year1, height2)
plt.show()
# 数据
labels = ['房贷', '购车', '教育', '饮食']
data = [4000, 2000, 6000, 1200]
plt.title('饼图')
plt.pie(data, labels=labels, autopct='%1.1f%%') # autopct='%1.1f%%'为保留一位小数
plt.show()
# 数据
data = [[12.2, 23.4], [14.5, 11.4], [15.8, 22.9]]
x = [item[0] for item in data]
y = [item[0] for item in data]
plt.title('散点图')
plt.scatter(x, y)
plt.xlabel('价格(元)')
plt.ylabel('销售(件)')
# 在指定的坐标嵌入文字
plt.text(12, 12, '牙膏')
plt.text(14, 14, '洗衣粉')
plt.text(15, 15, '衣服')
plt.show()
# 数据
data = [88, 78, 68, 79, 90, 89, 67, 76, 98, 30, 30]
plt.title('箱线图')
plt.boxplot(data)
plt.show()
# 极径和角度数据
r = [1, 2, 3, 4, 5] # 极径
theta = [0.0, 1.57, 3.14, 4.71, 6.28]
ax = plt.subplot(111, projection='polar') # 指定坐标轴为极坐标轴
plt.plot(theta, r) # 绘制极线图
# 指定坐标轴为极坐标轴
ax1 = plt.subplot(111, projection='polar')
# 绘制极坐标轴的示例
ax1.plot([1, 2, 3, 4, 5])
ax1.scatter([0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.4, 0.2, 0.8, 0.3])
plt.title('极线图')
plt.show() # 显示图形
# 数据
year = range(2005, 2020)
height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
plt.title('阶梯图')
plt.step(year, height)
plt.show()
# 图形配置
x = [1, 2, 3]
name = ['一班', '二班', '三班']
y = [70, 90, 88]
# 柱状图
plt.bar(x, y)
# # 图形配置
plt.title('成绩柱状图') # 标题
plt.xlabel('班级')
plt.ylabel('成绩')
plt.xticks(x, name) # 设置X轴柱状图名称
for i in range(1, 4):
plt.text(i, y[i - 1] + 1, y[i - 1]) # 纵坐标的具体分数
plt.show()
# 数据: 三个学科的成绩
ch = [72, 80, 66, 77, 92]
math = [62, 92, 72, 75, 88]
eng = [88, 76, 45, 80, 98]
plt.title('堆积图')
plt.bar(range(1, 6), ch, color='r', label='语文成绩') # 绘制语文柱状图
plt.bar(range(1, 6), math, bottom=ch, color='g', label='数学成绩') # bottom=ch在语文柱状图的基础上绘制数学柱状图
chmath = [ch[i] + math[i] for i in range(5)] # 计算语文和数学成绩之和
plt.bar(range(1, 6), eng, bottom=chmath, color='b', label='英语成绩') # bottom=chmath在数学和语文之和柱状图的基础上英语柱状图
plt.show()
# 数据: 三个学科的成绩
c1 = [72, 80, 66]
c2 = [62, 92, 72]
c3 = [88, 76, 45]
name_list = ['语文', '数学', '英语']
width = 0.4 # 柱状图宽度
x = [1, 3, 5] # 柱状图之间的间隔
plt.bar(x, c1, label='c1', fc='r', width=width)
x = [1.4, 3.4, 5.4]
plt.bar(x, c2, label='c2', fc='g', width=width)
x = [1.8, 3.8, 5.8]
plt.bar(x, c3, label='c3', fc='b', width=width)
x = [1.4, 3.4, 5.4]
# 设置横坐标的名称
plt.xticks(x, name_list)
# 设置班级颜色
plt.legend()
plt.title('分块图-三班级成绩图')
plt.xlabel('科目')
plt.ylabel('成绩')
plt.show()
x = [22, 23, 24, 25, 26, 27, 28, 29, 30] # 随机生成年龄
y = [155, 150, 175, 180, 179, 190, 189, 170, 168] # 随机生成身高
z = [60, 66, 58, 76, 90, 89, 77, 88, 98] # 随机生成体重
# 绘制气泡图: s指定气泡的大小
plt.scatter(x, y, s=z)
plt.title('气泡图')
plt.show()
一. 柱形图
# Libraries
import numpy as np
import matplotlib.pyplot as plt
# Create dataset
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
x_pos = np.arange(len(bars))
# Create bars
plt.bar(x_pos, height)
# Create names on the x-axis
plt.xticks(x_pos, bars)
# Show graphic
plt.show()
二. 三色柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# set width of bars
barWidth = 0.25
# set heights of bars
bars1 = [12, 30, 1, 8, 22]
bars2 = [28, 6, 16, 5, 10]
bars3 = [29, 3, 24, 25, 17]
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
# Make the plot
plt.bar(r1, bars1, color='#7f6d5f', width=barWidth, edgecolor='white', label='var1')
plt.bar(r2, bars2, color='#557f2d', width=barWidth, edgecolor='white', label='var2')
plt.bar(r3, bars3, color='#2d7f5e', width=barWidth, edgecolor='white', label='var3')
# Add xticks on the middle of the group bars
plt.xlabel('group', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], ['A', 'B', 'C', 'D', 'E'])
# Create legend & Show graphic
plt.legend()
plt.show()
三. 叠加柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# y-axis in bold
rc('font', weight='bold')
# Values of each group
bars1 = [12, 28, 1, 8, 22]
bars2 = [28, 7, 16, 4, 10]
bars3 = [25, 3, 23, 25, 17]
# Heights of bars1 + bars2
bars = np.add(bars1, bars2).tolist()
# The position of the bars on the x-axis
r = [0,1,2,3,4]
# Names of group and bar width
names = ['A','B','C','D','E']
barWidth = 1
# Create brown bars
plt.bar(r, bars1, color='#7f6d5f', edgecolor='white', width=barWidth)
# Create green bars (middle), on top of the first ones
plt.bar(r, bars2, bottom=bars1, color='#557f2d', edgecolor='white', width=barWidth)
# Create green bars (top)
plt.bar(r, bars3, bottom=bars, color='#2d7f5e', edgecolor='white', width=barWidth)
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")
# Show graphic
plt.show()
四. 叠加百分比柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# Data
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
# From raw value to percentage
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]
blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]
# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create green Bars
plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)
# Create orange Bars
plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)
# Create blue Bars
plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)
# Custom x axis
plt.xticks(r, names)
plt.xlabel("group")
# Show graphic
plt.show()
五. 叠加百分比柱形图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")
df = sns.load_dataset("iris")
sns.histplot(data=df, y="sepal_length")
plt.show()
六. 紫色柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# create dataset
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
x_pos = np.arange(len(bars))
# Create bars and choose color
plt.bar(x_pos, height, color = (0.5,0.1,0.5,0.6))
# Add title and axis names
plt.title('My title')
plt.xlabel('categories')
plt.ylabel('values')
# Create names on the x axis
plt.xticks(x_pos, bars)
# Show graph
plt.show()
七.带条纹的条形图
# Libraries
import numpy as np
import matplotlib.pyplot as plt
# Create dataset
height = [2, 5, 4, 6]
bars = ('A', 'B', 'C', 'D')
x_pos = np.arange(len(bars))
# Create bars
figure = plt.bar(x_pos, height)
# Define some hatches
hatches = ['-', '/', '||', '///']
# Loop over bars and assign hatches
for bar, hatch in zip(figure, hatches):
bar.set_hatch(hatch)
# Create names on the x-axis
plt.xticks(x_pos, bars)
# Show graphic
plt.show()
八、带标签的不同色条形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# width of the bars
barWidth = 0.3
# Choose the height of the blue bars
bars1 = [10, 9, 2]
# Choose the height of the cyan bars
bars2 = [10.8, 9.5, 4.5]
# Choose the height of the error bars (bars1)
yer1 = [0.5, 0.4, 0.5]
# Choose the height of the error bars (bars2)
yer2 = [1, 0.7, 1]
# The x position of bars
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
# Create blue bars
plt.bar(r1, bars1, width = barWidth, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=7, label='poacee')
# Create cyan bars
plt.bar(r2, bars2, width = barWidth, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='sorgho')
# general layout
plt.xticks([r + barWidth for r in range(len(bars1))], ['cond_A', 'cond_B', 'cond_C'])
plt.ylabel('height')
plt.legend()
# Show graphic
plt.show()
九、详细的条形图
# library
import matplotlib.pyplot as plt
# Create bars
barWidth = 0.9
bars1 = [3, 3, 1]
bars2 = [4, 2, 3]
bars3 = [4, 6, 7, 10, 4, 4]
bars4 = bars1 + bars2 + bars3
# The X position of bars
r1 = [1,5,9]
r2 = [2,6,10]
r3 = [3,4,7,8,11,12]
r4 = r1 + r2 + r3
# Create barplot
plt.bar(r1, bars1, width = barWidth, color = (0.3,0.1,0.4,0.6), label='Alone')
plt.bar(r2, bars2, width = barWidth, color = (0.3,0.5,0.4,0.6), label='With Himself')
plt.bar(r3, bars3, width = barWidth, color = (0.3,0.9,0.4,0.6), label='With other genotype')
# Note: the barplot could be created easily. See the barplot section for other examples.
# Create legend
plt.legend()
# Text below each barplot with a rotation at 90°
plt.xticks([r + barWidth for r in range(len(r4))], ['DD', 'with himself', 'with DC', 'with Silur', 'DC', 'with himself', 'with DD', 'with Silur', 'Silur', 'with himself', 'with DD', 'with DC'], rotation=90)
# Create labels
label = ['n = 6', 'n = 25', 'n = 13', 'n = 36', 'n = 30', 'n = 11', 'n = 16', 'n = 37', 'n = 14', 'n = 4', 'n = 31', 'n = 34']
# Text on the top of each bar
for i in range(len(r4)):
plt.text(x = r4[i]-0.5 , y = bars4[i]+0.1, s = label[i], size = 6)
# Adjust the margins
plt.subplots_adjust(bottom= 0.2, top = 0.98)
# Show graphic
plt.show()
十、色块
# library
import seaborn as sns
import pandas as pd
import numpy as np
# Create a dataset
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatmap: just a visualization of this square matrix
sns.heatmap(df)
十一、带数值的色块
# libraries
import seaborn as sns
import pandas as pd
import numpy as np
# Create a dataset
df = pd.DataFrame(np.random.random((10,10)), columns=["a","b","c","d","e","f","g","h","i","j"])
# plot a heatmap with annotation
sns.heatmap(df, annot=True, annot_kws={"size": 7})
十二、散点图
# libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
# Create data frame with randomly selected x and y positions
df = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])
# Add a column: the color depends on x and y values, but you can use any function you want
value=(df['x']>0.2) & (df['y']>0.4)
df['color']= np.where( value==True , "#9b59b6", "#3498db")
# plot
sns.regplot(data=df, x="x", y="y", fit_reg=False, scatter_kws={'facecolors':df['color']})
plt.show()
十三、颜色渐变的散点图
# Libraries
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# create data
x = np.random.rand(80) - 0.5
y = x+np.random.rand(80)
z = x+np.random.rand(80)
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
# Plot with palette
sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues")
plt.show()
# reverse palette
sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues_r")
plt.show()
19
十四、k线图
# libraries
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
# Data
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
# Proposed themes: darkgrid, whitegrid, dark, white, and ticks
sns.set_style("whitegrid")
sns.boxplot(data=data)
plt.title("whitegrid")
plt.show()
sns.set_style("darkgrid")
sns.boxplot(data=data);
plt.title("darkgrid")
plt.show()
sns.set_style("white")
sns.boxplot(data=data);
plt.title("white")
plt.show()
sns.set_style("dark")
sns.boxplot(data=data);
plt.title("dark")
plt.show()
sns.set_style("ticks")
sns.boxplot(data=data);
plt.title("ticks")
plt.show()
十五、带点折线图
# library and dataset
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# Create data
df=pd.DataFrame({'x_axis': range(1,101), 'y_axis': np.random.randn(100)*15+range(1,101), 'z': (np.random.randn(100)*15+range(1,101))*2 })
# plot with matplotlib
plt.plot( 'x_axis', 'y_axis', data=df, marker='o', color='mediumvioletred')
plt.show()
十六、对比折线图
# Libraries and data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })
# Draw plot
plt.plot( 'x_values', 'y_values', data=df, color='skyblue')
plt.show()
# Draw line chart by modifiying transparency of the line
plt.plot( 'x_values', 'y_values', data=df, color='skyblue', alpha=0.3)
# Show plot
plt.show()
十七、虚线折线图
# Libraries and data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })
# Draw line chart with dashed line
plt.plot( 'x_values', 'y_values', data=df, linestyle='dashed')
# Show graph
plt.show()
十八、多种颜色的折线图
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Make a data frame
df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14), 'y10': np.random.randn(10)+range(2,12) })
# Change the style of plot
plt.style.use('seaborn-darkgrid')
# Create a color palette
palette = plt.get_cmap('Set1')
# Plot multiple lines
num=0
for column in df.drop('x', axis=1):
num+=1
plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)
# Add legend
plt.legend(loc=2, ncol=2)
# Add titles
plt.title("A (bad) Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Score")
# Show the graph
plt.show()
十九、几种线
plt.plot( [1,1.1,1,1.1,1], linestyle='-' , linewidth=4)
plt.text(1.5, 1.3, "linestyle = '-' ", horizontalalignment='left', size='medium', color='C0', weight='semibold')
plt.plot( [2,2.1,2,2.1,2], linestyle='--' , linewidth=4 )
plt.text(1.5, 2.3, "linestyle = '--' ", horizontalalignment='left', size='medium', color='C1', weight='semibold')
plt.plot( [3,3.1,3,3.1,3], linestyle='-.' , linewidth=4 )
plt.text(1.5, 3.3, "linestyle = '-.' ", horizontalalignment='left', size='medium', color='C2', weight='semibold')
plt.plot( [4,4.1,4,4.1,4], linestyle=':' , linewidth=4 )
plt.text(1.5, 4.3, "linestyle = ':' ", horizontalalignment='left', size='medium', color='C3', weight='semibold')
plt.axis('off')
plt.show()
二十、几种折线
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Data
df=pd.DataFrame({'x_values': range(1,11), 'y1_values': np.random.randn(10), 'y2_values': np.random.randn(10)+range(1,11), 'y3_values': np.random.randn(10)+range(11,21) })
# multiple line plots
plt.plot( 'x_values', 'y1_values', data=df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4)
plt.plot( 'x_values', 'y2_values', data=df, marker='', color='olive', linewidth=2)
plt.plot( 'x_values', 'y3_values', data=df, marker='', color='olive', linewidth=2, linestyle='dashed', label="toto")
# show legend
plt.legend()
# show graph
plt.show()
二十一、小倍数线图
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Make a data frame
df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14) })
# Initialize the figure style
plt.style.use('seaborn-darkgrid')
# create a color palette
palette = plt.get_cmap('Set1')
# multiple line plot
num=0
for column in df.drop('x', axis=1):
num+=1
# Find the right spot on the plot
plt.subplot(3,3, num)
# Plot the lineplot
plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1.9, alpha=0.9, label=column)
# Same limits for every chart
plt.xlim(0,10)
plt.ylim(-2,22)
# Not ticks everywhere
if num in range(7) :
plt.tick_params(labelbottom='off')
if num not in [1,4,7] :
plt.tick_params(labelleft='off')
# Add title
plt.title(column, loc='left', fontsize=12, fontweight=0, color=palette(num) )
# general title
plt.suptitle("How the 9 students improved\nthese past few days?", fontsize=13, fontweight=0, color='black', style='italic', y=1.02)
# Axis titles
plt.text(0.5, 0.02, 'Time', ha='center', va='center')
plt.text(0.06, 0.5, 'Note', ha='center', va='center', rotation='vertical')
# Show the graph
plt.show()
二十二、基本连通散点图
# Libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Set figure default figure size
plt.rcParams["figure.figsize"] = (10, 6)
# Create a random number generator for reproducibility
rng = np.random.default_rng(1111)
# Get some random points!
x = np.array(range(10))
y = rng.integers(10, 100, 10)
z = y + rng.integers(5, 20, 10)
plt.plot(x, z, linestyle="-", marker="o", label="Income")
plt.plot(x, y, linestyle="-", marker="o", label="Expenses")
plt.legend()
plt.show()
二十三、如何避免使用 python 进行过度绘制
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
# Dataset:
df=pd.DataFrame({'x': np.random.normal(10, 1.2, 20000), 'y': np.random.normal(10, 1.2, 20000), 'group': np.repeat('A',20000) })
tmp1=pd.DataFrame({'x': np.random.normal(14.5, 1.2, 20000), 'y': np.random.normal(14.5, 1.2, 20000), 'group': np.repeat('B',20000) })
tmp2=pd.DataFrame({'x': np.random.normal(9.5, 1.5, 20000), 'y': np.random.normal(15.5, 1.5, 20000), 'group': np.repeat('C',20000) })
df=df.append(tmp1).append(tmp2)
# plot
plt.plot( 'x', 'y', "", data=df, linestyle='', marker='o')
plt.xlabel('Value of X')
plt.ylabel('Value of Y')
plt.title('Overplotting looks like that:', loc='left')
plt.show()
二十四、基本甜甜圈
# library
import matplotlib.pyplot as plt
# create data
size_of_groups=[12,11,3,30]
# Create a pie plot
plt.pie(size_of_groups)
#plt.show()
# add a white circle at the center
my_circle=plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
# show the graph
plt.show()
二十五、自定义甜甜圈
# library
import matplotlib.pyplot as plt
# create data
names = ['groupA', 'groupB', 'groupC', 'groupD']
size = [12,11,3,30]
# Create a circle at the center of the plot
my_circle = plt.Circle( (0,0), 0.7, color='white')
# Give color names
plt.pie(size, labels=names, colors=['red','green','blue','skyblue'])
p = plt.gcf()
p.gca().add_artist(my_circle)
# Show the graph
plt.show()
# library
import matplotlib.pyplot as plt
# create data
names = ['groupA', 'groupB', 'groupC', 'groupD']
size = [12,11,3,30]
# Create a circle at the center of the plot
my_circle = plt.Circle( (0,0), 0.7, color='white')
# Not enough colors --> colors will cycle
plt.pie(size, labels=names, colors=['red','green'])
p = plt.gcf()
p.gca().add_artist(my_circle)
# Show the graph
plt.show()
二十六、改变背景的甜甜圈
# library
import matplotlib.pyplot as plt
# Data
names = 'groupA', 'groupB', 'groupC', 'groupD',
size = [12,11,3,30]
# create a figure and set different background
fig = plt.figure()
fig.patch.set_facecolor('black')
# Change color of text
plt.rcParams['text.color'] = 'white'
# Create a circle at the center of the plot
my_circle=plt.Circle( (0,0), 0.7, color='black')
# Pieplot + circle on it
plt.pie(size, labels=names)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
二十七、分组甜甜圈
# Libraries
import matplotlib.pyplot as plt
# Make data: I have 3 groups and 7 subgroups
group_names=['groupA', 'groupB', 'groupC']
group_size=[12,11,30]
subgroup_names=['A.1', 'A.2', 'A.3', 'B.1', 'B.2', 'C.1', 'C.2', 'C.3', 'C.4', 'C.5']
subgroup_size=[4,3,5,6,5,10,5,5,4,6]
# Create colors
a, b, c=[plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]
# First Ring (outside)
fig, ax = plt.subplots()
ax.axis('equal')
mypie, _ = ax.pie(group_size, radius=1.3, labels=group_names, colors=[a(0.6), b(0.6), c(0.6)] )
plt.setp( mypie, width=0.3, edgecolor='white')
# Second Ring (Inside)
mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=subgroup_names, labeldistance=0.7, colors=[a(0.5), a(0.4), a(0.3), b(0.5), b(0.4), c(0.6), c(0.5), c(0.4), c(0.3), c(0.2)])
plt.setp( mypie2, width=0.4, edgecolor='white')
plt.margins(0,0)
# show it
plt.show()
二十八、棒棒糖图
# libraries
import matplotlib.pyplot as plt
import numpy as np
# create data
x=range(1,41)
values=np.random.uniform(size=40)
# stem function
plt.stem(x, values)
plt.ylim(0, 1.2)
plt.show()
# stem function: If x is not provided, a sequence of numbers is created by python:
plt.stem(values)
plt.show()
二十九、自定义棒棒糖图
# libraries
import matplotlib.pyplot as plt
import numpy as np
# create data
values=np.random.uniform(size=40)
# plot without markers
plt.stem(values, markerfmt=' ')
plt.show()
# change color and shape and size and edges
(markers, stemlines, baseline) = plt.stem(values)
plt.setp(markers, marker='D', markersize=10, markeredgecolor="orange", markeredgewidth=2)
plt.show()
三十、垂直棒棒糖图
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })
# Reorder it based on the values
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color='skyblue')
plt.plot(ordered_df['values'], my_range, "o")
# Add titles and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("A vertical lolipop plot", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# Show the plot
plt.show()
三十一、高光棒棒糖图
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })
# Reorder it based on values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']=='B', 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']=='B', 70, 30)
# The horizontal plot is made using the hline() function
plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color=my_color, alpha=0.4)
plt.scatter(ordered_df['values'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# show the graph
plt.show()
三十二、分组棒棒糖图
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value:
ordered_df = df.sort_values(by='value1')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)
plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')
plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')
plt.legend()
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("Comparison of the value 1 and the value 2", loc='left')
plt.xlabel('Value of the variables')
plt.ylabel('Group')
# Show the graph
plt.show()
三十三、带条件色彩的棒棒糖图
#%%
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# Data
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x) + np.random.uniform(size=len(x)) - 0.2
# Create a color if the y axis value is equal or greater than 0
my_color = np.where(y>=0, 'orange', 'skyblue')
# The vertical plot is made using the vline function
plt.vlines(x=x, ymin=0, ymax=y, color=my_color, alpha=0.4)
plt.scatter(x, y, color=my_color, s=1, alpha=1)
# Add title and axis names
plt.title("Evolution of the value of ...", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# Show the graph
plt.show()
三十四、面积图
import numpy as np
import matplotlib.pyplot as plt
# Create data
x=range(1,6)
y=[1,4,6,8,4]
# Area plot
plt.fill_between(x, y)
# Show the graph
plt.show()
# Note that we could also use the stackplot function
# but fill_between is more convenient for future customization.
#plt.stackplot(x,y)
#plt.show()
三十五、改善区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# create data
x=range(1,15)
y=[1,4,6,8,4,5,3,2,4,1,5,6,8,7]
# Change the color and its transparency
plt.fill_between( x, y, color="skyblue", alpha=0.4)
# Show the graph
plt.show()
# Same, but add a stronger line on top (edge)
plt.fill_between( x, y, color="skyblue", alpha=0.2)
plt.plot(x, y, color="Slateblue", alpha=0.6)
# See the line plot function to learn how to customize the plt.plot function
# Show the graph
plt.show()
三十六、区域图表和分面
# libraries
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataset
my_count=["France","Australia","Japan","USA","Germany","Congo","China","England","Spain","Greece","Marocco","South Africa","Indonesia","Peru","Chili","Brazil"]
df = pd.DataFrame({
"country":np.repeat(my_count, 10),
"years":list(range(2000, 2010)) * 16,
"value":np.random.rand(160)
})
# Create a grid : initialize it
g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )
# Add the line over the area with the plot function
g = g.map(plt.plot, 'years', 'value')
# Fill the area with fill_between
g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")
# Control the title of each facet
g = g.set_titles("{col_name}")
# Add a title for the whole plot
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')
# Show the graph
plt.show()
三十七、白色网格区域图
# libraries
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# set the seaborn style
sns.set_style("whitegrid")
# Color palette
blue, = sns.color_palette("muted", 1)
# Create data
x = np.arange(23)
y = np.random.randint(8, 20, 23)
# Make the plot
fig, ax = plt.subplots()
ax.plot(x, y, color=blue, lw=3)
ax.fill_between(x, 0, y, alpha=.3)
ax.set(xlim=(0, len(x) - 1), ylim=(0, None), xticks=x)
# Show the graph
plt.show()
三十八、基本堆叠区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# --- FORMAT 1
# Your x and y axis
x=range(1,6)
y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]
# Basic stacked area chart.
plt.stackplot(x,y, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
# --- FORMAT 2
x=range(1,6)
y1=[1,4,6,8,9]
y2=[2,2,7,10,12]
y3=[2,8,5,10,6]
# Basic stacked area chart.
plt.stackplot(x,y1, y2, y3, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
三十九、海洋风格的堆叠区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# set seaborn style
sns.set_theme()
# Data
x=range(1,6)
y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]
# Plot
plt.stackplot(x,y, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
四十、基线选项堆叠面积图
# libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Create data
X = np.arange(0, 10, 1)
Y = X + 5 * np.random.random((5, X.size))
# There are 4 types of baseline we can use:
baseline = ["zero", "sym", "wiggle", "weighted_wiggle"]
# Let's make 4 plots, 1 for each baseline
for n, v in enumerate(baseline):
if n<3 :
plt.tick_params(labelbottom='off')
plt.subplot(2 ,2, n + 1)
plt.stackplot(X, *Y, baseline=v)
plt.title(v)
plt.tight_layout()
01. 小提琴图
小提琴图可以将一组或多组数据的数值变量分布可视化。
相比有时会隐藏数据特征的箱形图相比,小提琴图值得更多关注。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.violinplot(x=df["species"], y=df["sepal_length"])
plt.show()
02. 核密度估计图
核密度估计图其实是对直方图的一个自然拓展。
可以可视化一个或多个组的数值变量的分布,非常适合大型数据集。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.kdeplot(df['sepal_width'])
plt.show()
03. 直方图
直方图,可视化一组或多组数据的分布情况。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.distplot(a=df["sepal_length"], hist=True, kde=False, rug=False)
plt.show()
04. 箱形图
箱形图,可视化一组或多组数据的分布情况。
可以快速获得中位数、四分位数和异常值,但也隐藏数据集的各个数据点。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.boxplot(x=df["species"], y=df["sepal_length"])
plt.show()
06. 散点图
散点图,显示2个数值变量之间的关系。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.regplot(x=df["sepal_length"], y=df["sepal_width"])
plt.show()
08. 相关性图
相关性图或相关矩阵图,分析每对数据变量之间的关系。
相关性可视化为散点图,对角线用直方图或密度图表示每个变量的分布。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.pairplot(df)
plt.show()
10. 连接散点图
连接散点图就是一个线图,其中每个数据点由圆形或任何类型的标记展示。
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# 创建数据
df = pd.DataFrame({'x_axis': range(1, 10), 'y_axis': np.random.randn(9) * 80 + range(1, 10)})
# 绘制显示
plt.plot('x_axis', 'y_axis', data=df, linestyle='-', marker='o')
plt.show()
11. 二维密度图
二维密度图或二维直方图,可视化两个定量变量的组合分布。
它们总是在X轴上表示一个变量,另一个在Y轴上,就像散点图。
然后计算二维空间特定区域内的次数,并用颜色渐变表示。
形状变化:六边形a hexbin chart,正方形a 2d histogram,核密度2d density plots或contour plots。
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kde
# 创建数据, 200个点
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T
# 创建画布, 6个子图
fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))
# 第一个子图, 散点图
axes[0].set_title('Scatterplot')
axes[0].plot(x, y, 'ko')
# 第二个子图, 六边形
nbins = 20
axes[1].set_title('Hexbin')
axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)
# 2D 直方图
axes[2].set_title('2D Histogram')
axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)
# 高斯kde
k = kde.gaussian_kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
# 密度图
axes[3].set_title('Calculate Gaussian KDE')
axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='auto', cmap=plt.cm.BuGn_r)
# 添加阴影
axes[4].set_title('2D Density with shading')
axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
# 添加轮廓
axes[5].set_title('Contour')
axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
axes[5].contour(xi, yi, zi.reshape(xi.shape))
plt.show()
12. 条形图
条形图表示多个明确的变量的数值关系。每个变量都为一个条形。条形的大小代表其数值。
import numpy as np
import matplotlib.pyplot as plt
# 生成随机数据
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
y_pos = np.arange(len(bars))
# 创建条形图
plt.bar(y_pos, height)
# x轴标签
plt.xticks(y_pos, bars)
# 显示
plt.show()
13. 雷达图
雷达图,可以可视化多个定量变量的一个或多个系列的值。
每个变量都有自己的轴,所有轴都连接在图形的中心。
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
# 设置数据
df = pd.DataFrame({
'group': ['A', 'B', 'C', 'D'],
'var1': [38, 1.5, 30, 4],
'var2': [29, 10, 9, 34],
'var3': [8, 39, 23, 24],
'var4': [7, 31, 33, 14],
'var5': [28, 15, 32, 14]
})
# 目标数量
categories = list(df)[1:]
N = len(categories)
# 角度
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
# 初始化
ax = plt.subplot(111, polar=True)
# 设置第一处
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
# 添加背景信息
plt.xticks(angles[:-1], categories)
ax.set_rlabel_position(0)
plt.yticks([10, 20, 30], ["10", "20", "30"], color="grey", size=7)
plt.ylim(0, 40)
# 添加数据图
# 第一个
values = df.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=1, linestyle='solid', label="group A")
ax.fill(angles, values, 'b', alpha=0.1)
# 第二个
values = df.loc[1].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=1, linestyle='solid', label="group B")
ax.fill(angles, values, 'r', alpha=0.1)
# 添加图例
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
# 显示
plt.show()
14. 词云图
词云图是文本数据的视觉表示。
单词通常是单个的,每个单词的重要性以字体大小或颜色表示。
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 添加词语
text=("Python Python Python Matplotlib Chart Wordcloud Boxplot")
# 创建词云对象
wordcloud = WordCloud(width=480, height=480, margin=0).generate(text)
# 显示词云图
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()
15. 平行座标图
一个平行座标图,能够比较不同系列相同属性的数值情况。
Pandas可能是绘制平行坐标图的最佳方式。
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import parallel_coordinates
# 读取数据
data = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 创建图表
parallel_coordinates(data, 'species', colormap=plt.get_cmap("Set2"))
# 显示
plt.show()
17. 径向柱图
径向柱图同样也是条形图的变形,但是使用极坐标而不是直角坐标系。
绘制起来有点麻烦,而且比柱状图准确度低,但更引人注目。
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 生成数据
df = pd.DataFrame(
{
'Name': ['item ' + str(i) for i in list(range(1, 51)) ],
'Value': np.random.randint(low=10, high=100, size=50)
})
# 排序
df = df.sort_values(by=['Value'])
# 初始化画布
plt.figure(figsize=(20, 10))
ax = plt.subplot(111, polar=True)
plt.axis('off')
# 设置图表参数
upperLimit = 100
lowerLimit = 30
labelPadding = 4
# 计算最大值
max = df['Value'].max()
# 数据下限10, 上限100
slope = (max - lowerLimit) / max
heights = slope * df.Value + lowerLimit
# 计算条形图的宽度
width = 2*np.pi / len(df.index)
# 计算角度
indexes = list(range(1, len(df.index)+1))
angles = [element * width for element in indexes]
# 绘制条形图
bars = ax.bar(
x=angles,
height=heights,
width=width,
bottom=lowerLimit,
linewidth=2,
edgecolor="white",
color="#61a4b2",
)
# 添加标签
for bar, angle, height, label in zip(bars,angles, heights, df["Name"]):
# 旋转
rotation = np.rad2deg(angle)
# 翻转
alignment = ""
if angle >= np.pi/2 and angle < 3*np.pi/2:
alignment = "right"
rotation = rotation + 180
else:
alignment = "left"
# 最后添加标签
ax.text(
x=angle,
y=lowerLimit + bar.get_height() + labelPadding,
s=label,
ha=alignment,
va='center',
rotation=rotation,
rotation_mode="anchor")
plt.show()
19. 维恩图
维恩图,显示不同组之间所有可能的关系。
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
# 创建图表
venn2(subsets=(10, 5, 2), set_labels=('Group A', 'Group B'))
# 显示
plt.show()
20. 圆环图
圆环图,本质上就是一个饼图,中间切掉了一个区域。
import matplotlib.pyplot as plt
# 创建数据
size_of_groups = [12, 11, 3, 30]
# 生成饼图
plt.pie(size_of_groups)
# 在中心添加一个圆, 生成环形图
my_circle = plt.Circle((0, 0), 0.7, color='white')
p = plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
21. 饼图
饼图,最常见的可视化图表之一。
将圆划分成一个个扇形区域,每个区域代表在整体中所占的比例。
import matplotlib.pyplot as plt
# 创建数据
size_of_groups = [12, 11, 3, 30]
# 生成饼图
plt.pie(size_of_groups)
plt.show()
22. 树图
树图主要用来可视化树形数据结构,是一种特殊的层次类型,具有唯一的根节点,左子树,和右子树。
import pandas as pd
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
# 读取数据
df = pd.read_csv('mtcars.csv')
df = df.set_index('model')
# 计算每个样本之间的距离
Z = linkage(df, 'ward')
# 绘图
dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)
# 显示
plt.show()
25. 面积图
面积图和折线图非常相似,区别在于和x坐标轴间是否被颜色填充。
import matplotlib.pyplot as plt
# 创建数据
x = range(1, 6)
y = [1, 4, 6, 8, 4]
# 生成图表
plt.fill_between(x, y)
plt.show()
使用Matplotlib的fill_between()进行绘制,结果如下。
b5d3cdba052058d381dcdcd7c28cb680.png
26. 堆叠面积图
堆叠面积图表示若干个数值变量的数值演变。
每个显示在彼此的顶部,易于读取总数,但较难准确读取每个的值。
import matplotlib.pyplot as plt
# 创建数据
x = range(1, 6)
y1 = [1, 4, 6, 8, 9]
y2 = [2, 2, 7, 10, 12]
y3 = [2, 8, 5, 10, 6]
# 生成图表
plt.stackplot(x, y1, y2, y3, labels=['A', 'B', 'C'])
plt.legend(loc='upper left')
plt.show()
27. 河流图
河流图是一种特殊的流图, 它主要用来表示事件或主题等在一段时间内的变化。
围绕着中心轴显示,且边缘是圆形的,从而形成流动的形状。
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
# 添加数据
x = np.arange(1990, 2020)
y = [np.random.randint(0, 5, size=30) for _ in range(5)]
def gaussian_smooth(x, y, grid, sd):
"""平滑曲线"""
weights = np.transpose([stats.norm.pdf(grid, m, sd) for m in x])
weights = weights / weights.sum(0)
return (weights * y).sum(1)
# 自定义颜色
COLORS = ["#D0D1E6", "#A6BDDB", "#74A9CF", "#2B8CBE", "#045A8D"]
# 创建画布
fig, ax = plt.subplots(figsize=(10, 7))
# 生成图表
grid = np.linspace(1985, 2025, num=500)
y_smoothed = [gaussian_smooth(x, y_, grid, 1) for y_ in y]
ax.stackplot(grid, y_smoothed, colors=COLORS, baseline="sym")
# 显示
plt.show()
28. 时间序列图
时间序列图是指能够展示数值演变的所有图表。
比如折线图、柱状图、面积图等等。
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# 创建数据
my_count = ["France", "Australia", "Japan", "USA", "Germany", "Congo", "China", "England", "Spain", "Greece", "Marocco",
"South Africa", "Indonesia", "Peru", "Chili", "Brazil"]
df = pd.DataFrame({
"country": np.repeat(my_count, 10),
"years": list(range(2000, 2010)) * 16,
"value": np.random.rand(160)
})
# 创建网格
g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )
# 添加曲线图
g = g.map(plt.plot, 'years', 'value')
# 面积图
g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")
# 标题
import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_csv('SecondhandHouse_view.csv')
plt.rcParams['font.sans-serif'] = 'SimHei'#让标签可以以中文形式呈现
plt.rcParams['axes.unicode_minus']=False
#任务1:散点图
plt.scatter(data['建筑面积'], data['总价'])#需要的列表数据
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析.png')
plt.show()
#折线图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.plot(data['建筑面积'], data['总价'])
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析折线图1.png')
plt.show()
#饼状图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.pie(data['总价'], labels=data['建筑面积'], autopct='%1.1f%%')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析饼状图2.png')
plt.show()
#箱装图
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.boxplot(data['总价'], labels=['总价'])
plt.xlabel('建筑面积(平米)')
plt.ylabel('总价(万)')
plt.title('二手房建筑面积与房价的关系分析')
plt.savefig('二手房建筑面积与房价的关系分析箱装图3.png')
plt.show()
# 任务2:将“总价”列的数值按照指定区间划分至不同等级,并绘制不同等级的二手房出售数量分布的柱状图
data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))
grade_counts = data['等级'].value_counts().sort_index()
grade_counts.plot(kind='bar', x='等级', y='总价(万)')
plt.xticks([i for i in range(1, 11)],rotation=45)
plt.xlabel('等级')
plt.ylabel('二手房出售数量')
plt.savefig('柱状图.png')
plt.show()
# 任务3:绘制不同区域二手房平均房价与出售数量的组合图
# 统计不同区域的二手房总价的平均值和二手房出售数量;二手房平均价使用柱状图,
# 二手房出售数量使用折线图,其中x轴数值为二手房区域;由于二手房平均价与出售数量的数值差距过大,
# 设定双y轴与对应的轴标签。
# 统计不同区域的二手房总价的平均值和二手房出售数量
average_prices = data.groupby('区域')['总价'].mean()
sale_quantities = data.groupby('区域')['总价'].count()
from matplotlib.font_manager import FontProperties
# 创建画布和子图
fig, ax1 = plt.subplots()
plt.rcParams['font.sans-serif']='SimHei' #设置中文显示
plt.rcParams['axes.unicode_minus']=False
# 绘制柱状图(二手房平均价)
ax1.bar(average_prices.index, average_prices, alpha=0.5)
ax1.set_ylabel('二手房平均价', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
# 创建第二个坐标轴共享同一个x轴
ax2 = ax1.twinx()
# 绘制折线图(二手房出售数量)
ax2.plot(sale_quantities.index, sale_quantities, color='red', marker='o')
ax2.set_ylabel('二手房出售数量', color='red')
ax2.tick_params(axis='y', labelcolor='red')
# 设置x轴标签为二手房区域
plt.xticks(range(len(average_prices)), average_prices.index)
# 添加图例
ax1.legend(['平均价'], loc='upper left')
ax2.legend(['出售数量'], loc='upper right')
# 显示图形
plt.show()
import numpy as np
import matplotlib.pyplot as plt
data = np.load('国民经济核算季度数据.npz', allow_pickle=True)
columns = data['columns']#表头有多少类别
values = data['values']#对应上面的类别分别是什么数据
print(columns)
print(values)
print(data['values'].shape)#多少类别
import matplotlib.pyplot as plt
import matplotlib as mpl
import random
"""
常见的可视化形式:
1,统计图:直方图、折线图、饼图
2,分布图:热力图、散点图、气泡图
数据可视化工具:
1,分析工具:pandas,SciPy , numpy , sklearn
2,绘图工具:matplotlib, Pychart, reportlab
3,平台工具:Jupyter Notebook, PyCharm
"""
x = [1, 2]
y = [-3, 4]
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文
plt.rcParams['axes.unicode_minus'] = False
plt.title('柱状图')
plt.bar(x, y)
plt.show()
#plt.rcParams['lines.linewidth'] = 10
#plt.rcParams['lines.linestyle'] = '--'#虚线
plt.title('虚线图')
plt.plot(x, y)
plt.show()
# 身高数据
height1 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170, 190]
bins1 = range(110, 191,5) # 定义区间
plt.title('直方图')
# 绘制直方图
plt.hist(height1, bins=bins1)
plt.show()
# 数据
classes = ['c1', 'c2', 'c3']
score = [70, 90, 88]
#图形配置
plt.title('条形图') #标题
plt.xlabel('班级')
plt.ylabel('成绩')
# 条形图
plt.bar(classes, score)
plt.show()
# 数据
year1 = range(2005, 2020)
height2 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
plt.title('折线图')
plt.plot(year1, height2)
plt.show()
# 数据
labels = ['房贷', '购车', '教育', '饮食']
data = [4000, 2000, 6000, 1200]
plt.title('饼图')
plt.pie(data, labels=labels, autopct='%1.1f%%') # autopct='%1.1f%%'为保留一位小数
plt.show()
# 数据
data = [[12.2, 23.4], [14.5, 11.4], [15.8, 22.9]]
x = [item[0] for item in data]
y = [item[0] for item in data]
plt.title('散点图')
plt.scatter(x, y)
plt.xlabel('价格(元)')
plt.ylabel('销售(件)')
# 在指定的坐标嵌入文字
plt.text(12, 12, '牙膏')
plt.text(14, 14, '洗衣粉')
plt.text(15, 15, '衣服')
plt.show()
# 数据
data = [88, 78, 68, 79, 90, 89, 67, 76, 98, 30, 30]
plt.title('箱线图')
plt.boxplot(data)
plt.show()
# 极径和角度数据
r = [1, 2, 3, 4, 5] # 极径
theta = [0.0, 1.57, 3.14, 4.71, 6.28]
ax = plt.subplot(111, projection='polar') # 指定坐标轴为极坐标轴
plt.plot(theta, r) # 绘制极线图
# 指定坐标轴为极坐标轴
ax1 = plt.subplot(111, projection='polar')
# 绘制极坐标轴的示例
ax1.plot([1, 2, 3, 4, 5])
ax1.scatter([0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.4, 0.2, 0.8, 0.3])
plt.title('极线图')
plt.show() # 显示图形
# 数据
year = range(2005, 2020)
height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
plt.title('阶梯图')
plt.step(year, height)
plt.show()
# 图形配置
x = [1, 2, 3]
name = ['一班', '二班', '三班']
y = [70, 90, 88]
# 柱状图
plt.bar(x, y)
# # 图形配置
plt.title('成绩柱状图') # 标题
plt.xlabel('班级')
plt.ylabel('成绩')
plt.xticks(x, name) # 设置X轴柱状图名称
for i in range(1, 4):
plt.text(i, y[i - 1] + 1, y[i - 1]) # 纵坐标的具体分数
plt.show()
# 数据: 三个学科的成绩
ch = [72, 80, 66, 77, 92]
math = [62, 92, 72, 75, 88]
eng = [88, 76, 45, 80, 98]
plt.title('堆积图')
plt.bar(range(1, 6), ch, color='r', label='语文成绩') # 绘制语文柱状图
plt.bar(range(1, 6), math, bottom=ch, color='g', label='数学成绩') # bottom=ch在语文柱状图的基础上绘制数学柱状图
chmath = [ch[i] + math[i] for i in range(5)] # 计算语文和数学成绩之和
plt.bar(range(1, 6), eng, bottom=chmath, color='b', label='英语成绩') # bottom=chmath在数学和语文之和柱状图的基础上英语柱状图
plt.show()
# 数据: 三个学科的成绩
c1 = [72, 80, 66]
c2 = [62, 92, 72]
c3 = [88, 76, 45]
name_list = ['语文', '数学', '英语']
width = 0.4 # 柱状图宽度
x = [1, 3, 5] # 柱状图之间的间隔
plt.bar(x, c1, label='c1', fc='r', width=width)
x = [1.4, 3.4, 5.4]
plt.bar(x, c2, label='c2', fc='g', width=width)
x = [1.8, 3.8, 5.8]
plt.bar(x, c3, label='c3', fc='b', width=width)
x = [1.4, 3.4, 5.4]
# 设置横坐标的名称
plt.xticks(x, name_list)
# 设置班级颜色
plt.legend()
plt.title('分块图-三班级成绩图')
plt.xlabel('科目')
plt.ylabel('成绩')
plt.show()
x = [22, 23, 24, 25, 26, 27, 28, 29, 30] # 随机生成年龄
y = [155, 150, 175, 180, 179, 190, 189, 170, 168] # 随机生成身高
z = [60, 66, 58, 76, 90, 89, 77, 88, 98] # 随机生成体重
# 绘制气泡图: s指定气泡的大小
plt.scatter(x, y, s=z)
plt.title('气泡图')
plt.show()
一. 柱形图
# Libraries
import numpy as np
import matplotlib.pyplot as plt
# Create dataset
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
x_pos = np.arange(len(bars))
# Create bars
plt.bar(x_pos, height)
# Create names on the x-axis
plt.xticks(x_pos, bars)
# Show graphic
plt.show()
二. 三色柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# set width of bars
barWidth = 0.25
# set heights of bars
bars1 = [12, 30, 1, 8, 22]
bars2 = [28, 6, 16, 5, 10]
bars3 = [29, 3, 24, 25, 17]
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
# Make the plot
plt.bar(r1, bars1, color='#7f6d5f', width=barWidth, edgecolor='white', label='var1')
plt.bar(r2, bars2, color='#557f2d', width=barWidth, edgecolor='white', label='var2')
plt.bar(r3, bars3, color='#2d7f5e', width=barWidth, edgecolor='white', label='var3')
# Add xticks on the middle of the group bars
plt.xlabel('group', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], ['A', 'B', 'C', 'D', 'E'])
# Create legend & Show graphic
plt.legend()
plt.show()
三. 叠加柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# y-axis in bold
rc('font', weight='bold')
# Values of each group
bars1 = [12, 28, 1, 8, 22]
bars2 = [28, 7, 16, 4, 10]
bars3 = [25, 3, 23, 25, 17]
# Heights of bars1 + bars2
bars = np.add(bars1, bars2).tolist()
# The position of the bars on the x-axis
r = [0,1,2,3,4]
# Names of group and bar width
names = ['A','B','C','D','E']
barWidth = 1
# Create brown bars
plt.bar(r, bars1, color='#7f6d5f', edgecolor='white', width=barWidth)
# Create green bars (middle), on top of the first ones
plt.bar(r, bars2, bottom=bars1, color='#557f2d', edgecolor='white', width=barWidth)
# Create green bars (top)
plt.bar(r, bars3, bottom=bars, color='#2d7f5e', edgecolor='white', width=barWidth)
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")
# Show graphic
plt.show()
四. 叠加百分比柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# Data
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
# From raw value to percentage
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]
blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]
# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create green Bars
plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)
# Create orange Bars
plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)
# Create blue Bars
plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)
# Custom x axis
plt.xticks(r, names)
plt.xlabel("group")
# Show graphic
plt.show()
五. 叠加百分比柱形图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")
df = sns.load_dataset("iris")
sns.histplot(data=df, y="sepal_length")
plt.show()
六. 紫色柱形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# create dataset
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
x_pos = np.arange(len(bars))
# Create bars and choose color
plt.bar(x_pos, height, color = (0.5,0.1,0.5,0.6))
# Add title and axis names
plt.title('My title')
plt.xlabel('categories')
plt.ylabel('values')
# Create names on the x axis
plt.xticks(x_pos, bars)
# Show graph
plt.show()
七.带条纹的条形图
# Libraries
import numpy as np
import matplotlib.pyplot as plt
# Create dataset
height = [2, 5, 4, 6]
bars = ('A', 'B', 'C', 'D')
x_pos = np.arange(len(bars))
# Create bars
figure = plt.bar(x_pos, height)
# Define some hatches
hatches = ['-', '/', '||', '///']
# Loop over bars and assign hatches
for bar, hatch in zip(figure, hatches):
bar.set_hatch(hatch)
# Create names on the x-axis
plt.xticks(x_pos, bars)
# Show graphic
plt.show()
八、带标签的不同色条形图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# width of the bars
barWidth = 0.3
# Choose the height of the blue bars
bars1 = [10, 9, 2]
# Choose the height of the cyan bars
bars2 = [10.8, 9.5, 4.5]
# Choose the height of the error bars (bars1)
yer1 = [0.5, 0.4, 0.5]
# Choose the height of the error bars (bars2)
yer2 = [1, 0.7, 1]
# The x position of bars
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
# Create blue bars
plt.bar(r1, bars1, width = barWidth, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=7, label='poacee')
# Create cyan bars
plt.bar(r2, bars2, width = barWidth, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='sorgho')
# general layout
plt.xticks([r + barWidth for r in range(len(bars1))], ['cond_A', 'cond_B', 'cond_C'])
plt.ylabel('height')
plt.legend()
# Show graphic
plt.show()
九、详细的条形图
# library
import matplotlib.pyplot as plt
# Create bars
barWidth = 0.9
bars1 = [3, 3, 1]
bars2 = [4, 2, 3]
bars3 = [4, 6, 7, 10, 4, 4]
bars4 = bars1 + bars2 + bars3
# The X position of bars
r1 = [1,5,9]
r2 = [2,6,10]
r3 = [3,4,7,8,11,12]
r4 = r1 + r2 + r3
# Create barplot
plt.bar(r1, bars1, width = barWidth, color = (0.3,0.1,0.4,0.6), label='Alone')
plt.bar(r2, bars2, width = barWidth, color = (0.3,0.5,0.4,0.6), label='With Himself')
plt.bar(r3, bars3, width = barWidth, color = (0.3,0.9,0.4,0.6), label='With other genotype')
# Note: the barplot could be created easily. See the barplot section for other examples.
# Create legend
plt.legend()
# Text below each barplot with a rotation at 90°
plt.xticks([r + barWidth for r in range(len(r4))], ['DD', 'with himself', 'with DC', 'with Silur', 'DC', 'with himself', 'with DD', 'with Silur', 'Silur', 'with himself', 'with DD', 'with DC'], rotation=90)
# Create labels
label = ['n = 6', 'n = 25', 'n = 13', 'n = 36', 'n = 30', 'n = 11', 'n = 16', 'n = 37', 'n = 14', 'n = 4', 'n = 31', 'n = 34']
# Text on the top of each bar
for i in range(len(r4)):
plt.text(x = r4[i]-0.5 , y = bars4[i]+0.1, s = label[i], size = 6)
# Adjust the margins
plt.subplots_adjust(bottom= 0.2, top = 0.98)
# Show graphic
plt.show()
十、色块
# library
import seaborn as sns
import pandas as pd
import numpy as np
# Create a dataset
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatmap: just a visualization of this square matrix
sns.heatmap(df)
十一、带数值的色块
# libraries
import seaborn as sns
import pandas as pd
import numpy as np
# Create a dataset
df = pd.DataFrame(np.random.random((10,10)), columns=["a","b","c","d","e","f","g","h","i","j"])
# plot a heatmap with annotation
sns.heatmap(df, annot=True, annot_kws={"size": 7})
十二、散点图
# libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
# Create data frame with randomly selected x and y positions
df = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])
# Add a column: the color depends on x and y values, but you can use any function you want
value=(df['x']>0.2) & (df['y']>0.4)
df['color']= np.where( value==True , "#9b59b6", "#3498db")
# plot
sns.regplot(data=df, x="x", y="y", fit_reg=False, scatter_kws={'facecolors':df['color']})
plt.show()
十三、颜色渐变的散点图
# Libraries
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# create data
x = np.random.rand(80) - 0.5
y = x+np.random.rand(80)
z = x+np.random.rand(80)
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
# Plot with palette
sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues")
plt.show()
# reverse palette
sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues_r")
plt.show()
19
十四、k线图
# libraries
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
# Data
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
# Proposed themes: darkgrid, whitegrid, dark, white, and ticks
sns.set_style("whitegrid")
sns.boxplot(data=data)
plt.title("whitegrid")
plt.show()
sns.set_style("darkgrid")
sns.boxplot(data=data);
plt.title("darkgrid")
plt.show()
sns.set_style("white")
sns.boxplot(data=data);
plt.title("white")
plt.show()
sns.set_style("dark")
sns.boxplot(data=data);
plt.title("dark")
plt.show()
sns.set_style("ticks")
sns.boxplot(data=data);
plt.title("ticks")
plt.show()
十五、带点折线图
# library and dataset
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# Create data
df=pd.DataFrame({'x_axis': range(1,101), 'y_axis': np.random.randn(100)*15+range(1,101), 'z': (np.random.randn(100)*15+range(1,101))*2 })
# plot with matplotlib
plt.plot( 'x_axis', 'y_axis', data=df, marker='o', color='mediumvioletred')
plt.show()
十六、对比折线图
# Libraries and data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })
# Draw plot
plt.plot( 'x_values', 'y_values', data=df, color='skyblue')
plt.show()
# Draw line chart by modifiying transparency of the line
plt.plot( 'x_values', 'y_values', data=df, color='skyblue', alpha=0.3)
# Show plot
plt.show()
十七、虚线折线图
# Libraries and data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })
# Draw line chart with dashed line
plt.plot( 'x_values', 'y_values', data=df, linestyle='dashed')
# Show graph
plt.show()
十八、多种颜色的折线图
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Make a data frame
df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14), 'y10': np.random.randn(10)+range(2,12) })
# Change the style of plot
plt.style.use('seaborn-darkgrid')
# Create a color palette
palette = plt.get_cmap('Set1')
# Plot multiple lines
num=0
for column in df.drop('x', axis=1):
num+=1
plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)
# Add legend
plt.legend(loc=2, ncol=2)
# Add titles
plt.title("A (bad) Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Score")
# Show the graph
plt.show()
十九、几种线
plt.plot( [1,1.1,1,1.1,1], linestyle='-' , linewidth=4)
plt.text(1.5, 1.3, "linestyle = '-' ", horizontalalignment='left', size='medium', color='C0', weight='semibold')
plt.plot( [2,2.1,2,2.1,2], linestyle='--' , linewidth=4 )
plt.text(1.5, 2.3, "linestyle = '--' ", horizontalalignment='left', size='medium', color='C1', weight='semibold')
plt.plot( [3,3.1,3,3.1,3], linestyle='-.' , linewidth=4 )
plt.text(1.5, 3.3, "linestyle = '-.' ", horizontalalignment='left', size='medium', color='C2', weight='semibold')
plt.plot( [4,4.1,4,4.1,4], linestyle=':' , linewidth=4 )
plt.text(1.5, 4.3, "linestyle = ':' ", horizontalalignment='left', size='medium', color='C3', weight='semibold')
plt.axis('off')
plt.show()
二十、几种折线
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Data
df=pd.DataFrame({'x_values': range(1,11), 'y1_values': np.random.randn(10), 'y2_values': np.random.randn(10)+range(1,11), 'y3_values': np.random.randn(10)+range(11,21) })
# multiple line plots
plt.plot( 'x_values', 'y1_values', data=df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4)
plt.plot( 'x_values', 'y2_values', data=df, marker='', color='olive', linewidth=2)
plt.plot( 'x_values', 'y3_values', data=df, marker='', color='olive', linewidth=2, linestyle='dashed', label="toto")
# show legend
plt.legend()
# show graph
plt.show()
二十一、小倍数线图
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Make a data frame
df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14) })
# Initialize the figure style
plt.style.use('seaborn-darkgrid')
# create a color palette
palette = plt.get_cmap('Set1')
# multiple line plot
num=0
for column in df.drop('x', axis=1):
num+=1
# Find the right spot on the plot
plt.subplot(3,3, num)
# Plot the lineplot
plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1.9, alpha=0.9, label=column)
# Same limits for every chart
plt.xlim(0,10)
plt.ylim(-2,22)
# Not ticks everywhere
if num in range(7) :
plt.tick_params(labelbottom='off')
if num not in [1,4,7] :
plt.tick_params(labelleft='off')
# Add title
plt.title(column, loc='left', fontsize=12, fontweight=0, color=palette(num) )
# general title
plt.suptitle("How the 9 students improved\nthese past few days?", fontsize=13, fontweight=0, color='black', style='italic', y=1.02)
# Axis titles
plt.text(0.5, 0.02, 'Time', ha='center', va='center')
plt.text(0.06, 0.5, 'Note', ha='center', va='center', rotation='vertical')
# Show the graph
plt.show()
二十二、基本连通散点图
# Libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Set figure default figure size
plt.rcParams["figure.figsize"] = (10, 6)
# Create a random number generator for reproducibility
rng = np.random.default_rng(1111)
# Get some random points!
x = np.array(range(10))
y = rng.integers(10, 100, 10)
z = y + rng.integers(5, 20, 10)
plt.plot(x, z, linestyle="-", marker="o", label="Income")
plt.plot(x, y, linestyle="-", marker="o", label="Expenses")
plt.legend()
plt.show()
二十三、如何避免使用 python 进行过度绘制
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
# Dataset:
df=pd.DataFrame({'x': np.random.normal(10, 1.2, 20000), 'y': np.random.normal(10, 1.2, 20000), 'group': np.repeat('A',20000) })
tmp1=pd.DataFrame({'x': np.random.normal(14.5, 1.2, 20000), 'y': np.random.normal(14.5, 1.2, 20000), 'group': np.repeat('B',20000) })
tmp2=pd.DataFrame({'x': np.random.normal(9.5, 1.5, 20000), 'y': np.random.normal(15.5, 1.5, 20000), 'group': np.repeat('C',20000) })
df=df.append(tmp1).append(tmp2)
# plot
plt.plot( 'x', 'y', "", data=df, linestyle='', marker='o')
plt.xlabel('Value of X')
plt.ylabel('Value of Y')
plt.title('Overplotting looks like that:', loc='left')
plt.show()
二十四、基本甜甜圈
# library
import matplotlib.pyplot as plt
# create data
size_of_groups=[12,11,3,30]
# Create a pie plot
plt.pie(size_of_groups)
#plt.show()
# add a white circle at the center
my_circle=plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
# show the graph
plt.show()
二十五、自定义甜甜圈
# library
import matplotlib.pyplot as plt
# create data
names = ['groupA', 'groupB', 'groupC', 'groupD']
size = [12,11,3,30]
# Create a circle at the center of the plot
my_circle = plt.Circle( (0,0), 0.7, color='white')
# Give color names
plt.pie(size, labels=names, colors=['red','green','blue','skyblue'])
p = plt.gcf()
p.gca().add_artist(my_circle)
# Show the graph
plt.show()
# library
import matplotlib.pyplot as plt
# create data
names = ['groupA', 'groupB', 'groupC', 'groupD']
size = [12,11,3,30]
# Create a circle at the center of the plot
my_circle = plt.Circle( (0,0), 0.7, color='white')
# Not enough colors --> colors will cycle
plt.pie(size, labels=names, colors=['red','green'])
p = plt.gcf()
p.gca().add_artist(my_circle)
# Show the graph
plt.show()
二十六、改变背景的甜甜圈
# library
import matplotlib.pyplot as plt
# Data
names = 'groupA', 'groupB', 'groupC', 'groupD',
size = [12,11,3,30]
# create a figure and set different background
fig = plt.figure()
fig.patch.set_facecolor('black')
# Change color of text
plt.rcParams['text.color'] = 'white'
# Create a circle at the center of the plot
my_circle=plt.Circle( (0,0), 0.7, color='black')
# Pieplot + circle on it
plt.pie(size, labels=names)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
二十七、分组甜甜圈
# Libraries
import matplotlib.pyplot as plt
# Make data: I have 3 groups and 7 subgroups
group_names=['groupA', 'groupB', 'groupC']
group_size=[12,11,30]
subgroup_names=['A.1', 'A.2', 'A.3', 'B.1', 'B.2', 'C.1', 'C.2', 'C.3', 'C.4', 'C.5']
subgroup_size=[4,3,5,6,5,10,5,5,4,6]
# Create colors
a, b, c=[plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]
# First Ring (outside)
fig, ax = plt.subplots()
ax.axis('equal')
mypie, _ = ax.pie(group_size, radius=1.3, labels=group_names, colors=[a(0.6), b(0.6), c(0.6)] )
plt.setp( mypie, width=0.3, edgecolor='white')
# Second Ring (Inside)
mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=subgroup_names, labeldistance=0.7, colors=[a(0.5), a(0.4), a(0.3), b(0.5), b(0.4), c(0.6), c(0.5), c(0.4), c(0.3), c(0.2)])
plt.setp( mypie2, width=0.4, edgecolor='white')
plt.margins(0,0)
# show it
plt.show()
二十八、棒棒糖图
# libraries
import matplotlib.pyplot as plt
import numpy as np
# create data
x=range(1,41)
values=np.random.uniform(size=40)
# stem function
plt.stem(x, values)
plt.ylim(0, 1.2)
plt.show()
# stem function: If x is not provided, a sequence of numbers is created by python:
plt.stem(values)
plt.show()
二十九、自定义棒棒糖图
# libraries
import matplotlib.pyplot as plt
import numpy as np
# create data
values=np.random.uniform(size=40)
# plot without markers
plt.stem(values, markerfmt=' ')
plt.show()
# change color and shape and size and edges
(markers, stemlines, baseline) = plt.stem(values)
plt.setp(markers, marker='D', markersize=10, markeredgecolor="orange", markeredgewidth=2)
plt.show()
三十、垂直棒棒糖图
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })
# Reorder it based on the values
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color='skyblue')
plt.plot(ordered_df['values'], my_range, "o")
# Add titles and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("A vertical lolipop plot", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# Show the plot
plt.show()
三十一、高光棒棒糖图
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })
# Reorder it based on values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']=='B', 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']=='B', 70, 30)
# The horizontal plot is made using the hline() function
plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color=my_color, alpha=0.4)
plt.scatter(ordered_df['values'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# show the graph
plt.show()
三十二、分组棒棒糖图
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value:
ordered_df = df.sort_values(by='value1')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)
plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')
plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')
plt.legend()
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("Comparison of the value 1 and the value 2", loc='left')
plt.xlabel('Value of the variables')
plt.ylabel('Group')
# Show the graph
plt.show()
三十三、带条件色彩的棒棒糖图
#%%
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# Data
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x) + np.random.uniform(size=len(x)) - 0.2
# Create a color if the y axis value is equal or greater than 0
my_color = np.where(y>=0, 'orange', 'skyblue')
# The vertical plot is made using the vline function
plt.vlines(x=x, ymin=0, ymax=y, color=my_color, alpha=0.4)
plt.scatter(x, y, color=my_color, s=1, alpha=1)
# Add title and axis names
plt.title("Evolution of the value of ...", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
# Show the graph
plt.show()
三十四、面积图
import numpy as np
import matplotlib.pyplot as plt
# Create data
x=range(1,6)
y=[1,4,6,8,4]
# Area plot
plt.fill_between(x, y)
# Show the graph
plt.show()
# Note that we could also use the stackplot function
# but fill_between is more convenient for future customization.
#plt.stackplot(x,y)
#plt.show()
三十五、改善区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# create data
x=range(1,15)
y=[1,4,6,8,4,5,3,2,4,1,5,6,8,7]
# Change the color and its transparency
plt.fill_between( x, y, color="skyblue", alpha=0.4)
# Show the graph
plt.show()
# Same, but add a stronger line on top (edge)
plt.fill_between( x, y, color="skyblue", alpha=0.2)
plt.plot(x, y, color="Slateblue", alpha=0.6)
# See the line plot function to learn how to customize the plt.plot function
# Show the graph
plt.show()
三十六、区域图表和分面
# libraries
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataset
my_count=["France","Australia","Japan","USA","Germany","Congo","China","England","Spain","Greece","Marocco","South Africa","Indonesia","Peru","Chili","Brazil"]
df = pd.DataFrame({
"country":np.repeat(my_count, 10),
"years":list(range(2000, 2010)) * 16,
"value":np.random.rand(160)
})
# Create a grid : initialize it
g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )
# Add the line over the area with the plot function
g = g.map(plt.plot, 'years', 'value')
# Fill the area with fill_between
g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")
# Control the title of each facet
g = g.set_titles("{col_name}")
# Add a title for the whole plot
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')
# Show the graph
plt.show()
三十七、白色网格区域图
# libraries
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# set the seaborn style
sns.set_style("whitegrid")
# Color palette
blue, = sns.color_palette("muted", 1)
# Create data
x = np.arange(23)
y = np.random.randint(8, 20, 23)
# Make the plot
fig, ax = plt.subplots()
ax.plot(x, y, color=blue, lw=3)
ax.fill_between(x, 0, y, alpha=.3)
ax.set(xlim=(0, len(x) - 1), ylim=(0, None), xticks=x)
# Show the graph
plt.show()
三十八、基本堆叠区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
# --- FORMAT 1
# Your x and y axis
x=range(1,6)
y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]
# Basic stacked area chart.
plt.stackplot(x,y, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
# --- FORMAT 2
x=range(1,6)
y1=[1,4,6,8,9]
y2=[2,2,7,10,12]
y3=[2,8,5,10,6]
# Basic stacked area chart.
plt.stackplot(x,y1, y2, y3, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
三十九、海洋风格的堆叠区域图
# libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# set seaborn style
sns.set_theme()
# Data
x=range(1,6)
y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]
# Plot
plt.stackplot(x,y, labels=['A','B','C'])
plt.legend(loc='upper left')
plt.show()
四十、基线选项堆叠面积图
# libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Create data
X = np.arange(0, 10, 1)
Y = X + 5 * np.random.random((5, X.size))
# There are 4 types of baseline we can use:
baseline = ["zero", "sym", "wiggle", "weighted_wiggle"]
# Let's make 4 plots, 1 for each baseline
for n, v in enumerate(baseline):
if n<3 :
plt.tick_params(labelbottom='off')
plt.subplot(2 ,2, n + 1)
plt.stackplot(X, *Y, baseline=v)
plt.title(v)
plt.tight_layout()
01. 小提琴图
小提琴图可以将一组或多组数据的数值变量分布可视化。
相比有时会隐藏数据特征的箱形图相比,小提琴图值得更多关注。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.violinplot(x=df["species"], y=df["sepal_length"])
plt.show()
02. 核密度估计图
核密度估计图其实是对直方图的一个自然拓展。
可以可视化一个或多个组的数值变量的分布,非常适合大型数据集。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.kdeplot(df['sepal_width'])
plt.show()
03. 直方图
直方图,可视化一组或多组数据的分布情况。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.distplot(a=df["sepal_length"], hist=True, kde=False, rug=False)
plt.show()
04. 箱形图
箱形图,可视化一组或多组数据的分布情况。
可以快速获得中位数、四分位数和异常值,但也隐藏数据集的各个数据点。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.boxplot(x=df["species"], y=df["sepal_length"])
plt.show()
06. 散点图
散点图,显示2个数值变量之间的关系。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.regplot(x=df["sepal_length"], y=df["sepal_width"])
plt.show()
08. 相关性图
相关性图或相关矩阵图,分析每对数据变量之间的关系。
相关性可视化为散点图,对角线用直方图或密度图表示每个变量的分布。
import seaborn as sns
import matplotlib.pyplot as plt
# 加载数据
df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 绘图显示
sns.pairplot(df)
plt.show()
10. 连接散点图
连接散点图就是一个线图,其中每个数据点由圆形或任何类型的标记展示。
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# 创建数据
df = pd.DataFrame({'x_axis': range(1, 10), 'y_axis': np.random.randn(9) * 80 + range(1, 10)})
# 绘制显示
plt.plot('x_axis', 'y_axis', data=df, linestyle='-', marker='o')
plt.show()
11. 二维密度图
二维密度图或二维直方图,可视化两个定量变量的组合分布。
它们总是在X轴上表示一个变量,另一个在Y轴上,就像散点图。
然后计算二维空间特定区域内的次数,并用颜色渐变表示。
形状变化:六边形a hexbin chart,正方形a 2d histogram,核密度2d density plots或contour plots。
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kde
# 创建数据, 200个点
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T
# 创建画布, 6个子图
fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))
# 第一个子图, 散点图
axes[0].set_title('Scatterplot')
axes[0].plot(x, y, 'ko')
# 第二个子图, 六边形
nbins = 20
axes[1].set_title('Hexbin')
axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)
# 2D 直方图
axes[2].set_title('2D Histogram')
axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)
# 高斯kde
k = kde.gaussian_kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
# 密度图
axes[3].set_title('Calculate Gaussian KDE')
axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='auto', cmap=plt.cm.BuGn_r)
# 添加阴影
axes[4].set_title('2D Density with shading')
axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
# 添加轮廓
axes[5].set_title('Contour')
axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
axes[5].contour(xi, yi, zi.reshape(xi.shape))
plt.show()
12. 条形图
条形图表示多个明确的变量的数值关系。每个变量都为一个条形。条形的大小代表其数值。
import numpy as np
import matplotlib.pyplot as plt
# 生成随机数据
height = [3, 12, 5, 18, 45]
bars = ('A', 'B', 'C', 'D', 'E')
y_pos = np.arange(len(bars))
# 创建条形图
plt.bar(y_pos, height)
# x轴标签
plt.xticks(y_pos, bars)
# 显示
plt.show()
13. 雷达图
雷达图,可以可视化多个定量变量的一个或多个系列的值。
每个变量都有自己的轴,所有轴都连接在图形的中心。
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
# 设置数据
df = pd.DataFrame({
'group': ['A', 'B', 'C', 'D'],
'var1': [38, 1.5, 30, 4],
'var2': [29, 10, 9, 34],
'var3': [8, 39, 23, 24],
'var4': [7, 31, 33, 14],
'var5': [28, 15, 32, 14]
})
# 目标数量
categories = list(df)[1:]
N = len(categories)
# 角度
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
# 初始化
ax = plt.subplot(111, polar=True)
# 设置第一处
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
# 添加背景信息
plt.xticks(angles[:-1], categories)
ax.set_rlabel_position(0)
plt.yticks([10, 20, 30], ["10", "20", "30"], color="grey", size=7)
plt.ylim(0, 40)
# 添加数据图
# 第一个
values = df.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=1, linestyle='solid', label="group A")
ax.fill(angles, values, 'b', alpha=0.1)
# 第二个
values = df.loc[1].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=1, linestyle='solid', label="group B")
ax.fill(angles, values, 'r', alpha=0.1)
# 添加图例
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
# 显示
plt.show()
14. 词云图
词云图是文本数据的视觉表示。
单词通常是单个的,每个单词的重要性以字体大小或颜色表示。
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 添加词语
text=("Python Python Python Matplotlib Chart Wordcloud Boxplot")
# 创建词云对象
wordcloud = WordCloud(width=480, height=480, margin=0).generate(text)
# 显示词云图
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()
15. 平行座标图
一个平行座标图,能够比较不同系列相同属性的数值情况。
Pandas可能是绘制平行坐标图的最佳方式。
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import parallel_coordinates
# 读取数据
data = sns.load_dataset('iris', data_home='seaborn-data', cache=True)
# 创建图表
parallel_coordinates(data, 'species', colormap=plt.get_cmap("Set2"))
# 显示
plt.show()
17. 径向柱图
径向柱图同样也是条形图的变形,但是使用极坐标而不是直角坐标系。
绘制起来有点麻烦,而且比柱状图准确度低,但更引人注目。
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 生成数据
df = pd.DataFrame(
{
'Name': ['item ' + str(i) for i in list(range(1, 51)) ],
'Value': np.random.randint(low=10, high=100, size=50)
})
# 排序
df = df.sort_values(by=['Value'])
# 初始化画布
plt.figure(figsize=(20, 10))
ax = plt.subplot(111, polar=True)
plt.axis('off')
# 设置图表参数
upperLimit = 100
lowerLimit = 30
labelPadding = 4
# 计算最大值
max = df['Value'].max()
# 数据下限10, 上限100
slope = (max - lowerLimit) / max
heights = slope * df.Value + lowerLimit
# 计算条形图的宽度
width = 2*np.pi / len(df.index)
# 计算角度
indexes = list(range(1, len(df.index)+1))
angles = [element * width for element in indexes]
# 绘制条形图
bars = ax.bar(
x=angles,
height=heights,
width=width,
bottom=lowerLimit,
linewidth=2,
edgecolor="white",
color="#61a4b2",
)
# 添加标签
for bar, angle, height, label in zip(bars,angles, heights, df["Name"]):
# 旋转
rotation = np.rad2deg(angle)
# 翻转
alignment = ""
if angle >= np.pi/2 and angle < 3*np.pi/2:
alignment = "right"
rotation = rotation + 180
else:
alignment = "left"
# 最后添加标签
ax.text(
x=angle,
y=lowerLimit + bar.get_height() + labelPadding,
s=label,
ha=alignment,
va='center',
rotation=rotation,
rotation_mode="anchor")
plt.show()
19. 维恩图
维恩图,显示不同组之间所有可能的关系。
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
# 创建图表
venn2(subsets=(10, 5, 2), set_labels=('Group A', 'Group B'))
# 显示
plt.show()
20. 圆环图
圆环图,本质上就是一个饼图,中间切掉了一个区域。
import matplotlib.pyplot as plt
# 创建数据
size_of_groups = [12, 11, 3, 30]
# 生成饼图
plt.pie(size_of_groups)
# 在中心添加一个圆, 生成环形图
my_circle = plt.Circle((0, 0), 0.7, color='white')
p = plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
21. 饼图
饼图,最常见的可视化图表之一。
将圆划分成一个个扇形区域,每个区域代表在整体中所占的比例。
import matplotlib.pyplot as plt
# 创建数据
size_of_groups = [12, 11, 3, 30]
# 生成饼图
plt.pie(size_of_groups)
plt.show()
22. 树图
树图主要用来可视化树形数据结构,是一种特殊的层次类型,具有唯一的根节点,左子树,和右子树。
import pandas as pd
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
# 读取数据
df = pd.read_csv('mtcars.csv')
df = df.set_index('model')
# 计算每个样本之间的距离
Z = linkage(df, 'ward')
# 绘图
dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)
# 显示
plt.show()
25. 面积图
面积图和折线图非常相似,区别在于和x坐标轴间是否被颜色填充。
import matplotlib.pyplot as plt
# 创建数据
x = range(1, 6)
y = [1, 4, 6, 8, 4]
# 生成图表
plt.fill_between(x, y)
plt.show()
使用Matplotlib的fill_between()进行绘制,结果如下。
b5d3cdba052058d381dcdcd7c28cb680.png
26. 堆叠面积图
堆叠面积图表示若干个数值变量的数值演变。
每个显示在彼此的顶部,易于读取总数,但较难准确读取每个的值。
import matplotlib.pyplot as plt
# 创建数据
x = range(1, 6)
y1 = [1, 4, 6, 8, 9]
y2 = [2, 2, 7, 10, 12]
y3 = [2, 8, 5, 10, 6]
# 生成图表
plt.stackplot(x, y1, y2, y3, labels=['A', 'B', 'C'])
plt.legend(loc='upper left')
plt.show()
27. 河流图
河流图是一种特殊的流图, 它主要用来表示事件或主题等在一段时间内的变化。
围绕着中心轴显示,且边缘是圆形的,从而形成流动的形状。
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
# 添加数据
x = np.arange(1990, 2020)
y = [np.random.randint(0, 5, size=30) for _ in range(5)]
def gaussian_smooth(x, y, grid, sd):
"""平滑曲线"""
weights = np.transpose([stats.norm.pdf(grid, m, sd) for m in x])
weights = weights / weights.sum(0)
return (weights * y).sum(1)
# 自定义颜色
COLORS = ["#D0D1E6", "#A6BDDB", "#74A9CF", "#2B8CBE", "#045A8D"]
# 创建画布
fig, ax = plt.subplots(figsize=(10, 7))
# 生成图表
grid = np.linspace(1985, 2025, num=500)
y_smoothed = [gaussian_smooth(x, y_, grid, 1) for y_ in y]
ax.stackplot(grid, y_smoothed, colors=COLORS, baseline="sym")
# 显示
plt.show()
28. 时间序列图
时间序列图是指能够展示数值演变的所有图表。
比如折线图、柱状图、面积图等等。
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# 创建数据
my_count = ["France", "Australia", "Japan", "USA", "Germany", "Congo", "China", "England", "Spain", "Greece", "Marocco",
"South Africa", "Indonesia", "Peru", "Chili", "Brazil"]
df = pd.DataFrame({
"country": np.repeat(my_count, 10),
"years": list(range(2000, 2010)) * 16,
"value": np.random.rand(160)
})
# 创建网格
g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )
# 添加曲线图
g = g.map(plt.plot, 'years', 'value')
# 面积图
g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")
# 标题
g = g.set_titles("{col_name}")
# 总标题
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')
# 显示
plt.show()
g = g.set_titles("{col_name}")
# 总标题
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')
# 显示
plt.show()