分布式比赛数据可视化笔记

最新推荐文章于 2024-08-06 13:52:03 发布

dd的博客

最新推荐文章于 2024-08-06 13:52:03 发布

阅读量270

点赞数 5

文章标签：笔记

本文链接：https://blog.csdn.net/qq_74237833/article/details/139011707

版权

import pandas as pd

import matplotlib.pyplot as plt

data=pd.read_csv('SecondhandHouse_view.csv')

plt.rcParams['font.sans-serif'] = 'SimHei'#让标签可以以中文形式呈现

plt.rcParams['axes.unicode_minus']=False

#任务1：散点图

plt.scatter(data['建筑面积'], data['总价'])#需要的列表数据

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析.png')

plt.show()

#折线图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.plot(data['建筑面积'], data['总价'])

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析折线图1.png')

plt.show()

#饼状图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.pie(data['总价'], labels=data['建筑面积'], autopct='%1.1f%%')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析饼状图2.png')

plt.show()

#箱装图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.boxplot(data['总价'], labels=['总价'])

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析箱装图3.png')

plt.show()

# 任务2：将“总价”列的数值按照指定区间划分至不同等级，并绘制不同等级的二手房出售数量分布的柱状图

data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))

grade_counts = data['等级'].value_counts().sort_index()

grade_counts.plot(kind='bar', x='等级', y='总价（万）')

plt.xticks([i for i in range(1, 11)],rotation=45)

plt.xlabel('等级')

plt.ylabel('二手房出售数量')

plt.savefig('柱状图.png')

plt.show()

# 任务3：绘制不同区域二手房平均房价与出售数量的组合图

# 统计不同区域的二手房总价的平均值和二手房出售数量；二手房平均价使用柱状图，

# 二手房出售数量使用折线图，其中x轴数值为二手房区域；由于二手房平均价与出售数量的数值差距过大，

# 设定双y轴与对应的轴标签。

# 统计不同区域的二手房总价的平均值和二手房出售数量

average_prices = data.groupby('区域')['总价'].mean()

sale_quantities = data.groupby('区域')['总价'].count()

from matplotlib.font_manager import FontProperties

# 创建画布和子图

fig, ax1 = plt.subplots()

plt.rcParams['font.sans-serif']='SimHei' #设置中文显示

plt.rcParams['axes.unicode_minus']=False

# 绘制柱状图（二手房平均价）

ax1.bar(average_prices.index, average_prices, alpha=0.5)

ax1.set_ylabel('二手房平均价', color='blue')

ax1.tick_params(axis='y', labelcolor='blue')

# 创建第二个坐标轴共享同一个x轴

ax2 = ax1.twinx()

# 绘制折线图（二手房出售数量）

ax2.plot(sale_quantities.index, sale_quantities, color='red', marker='o')

ax2.set_ylabel('二手房出售数量', color='red')

ax2.tick_params(axis='y', labelcolor='red')

# 设置x轴标签为二手房区域

plt.xticks(range(len(average_prices)), average_prices.index)

# 添加图例

ax1.legend(['平均价'], loc='upper left')

ax2.legend(['出售数量'], loc='upper right')

# 显示图形

plt.show()

import numpy as np

import matplotlib.pyplot as plt

data = np.load('国民经济核算季度数据.npz', allow_pickle=True)

columns = data['columns']#表头有多少类别

values = data['values']#对应上面的类别分别是什么数据

print(columns)

print(values)

print(data['values'].shape)#多少类别

import matplotlib.pyplot as plt

import matplotlib as mpl

import random

"""

常见的可视化形式：

1，统计图：直方图、折线图、饼图

2，分布图：热力图、散点图、气泡图

数据可视化工具：

1，分析工具：pandas，SciPy , numpy , sklearn

2，绘图工具：matplotlib, Pychart, reportlab

3，平台工具：Jupyter Notebook, PyCharm

"""

x = [1, 2]

y = [-3, 4]

plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文

plt.rcParams['axes.unicode_minus'] = False

plt.title('柱状图')

plt.bar(x, y)

plt.show()

#plt.rcParams['lines.linewidth'] = 10

#plt.rcParams['lines.linestyle'] = '--'#虚线

plt.title('虚线图')

plt.plot(x, y)

plt.show()

# 身高数据

height1 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170, 190]

bins1 = range(110, 191,5) # 定义区间

plt.title('直方图')

# 绘制直方图

plt.hist(height1, bins=bins1)

plt.show()

# 数据

classes = ['c1', 'c2', 'c3']

score = [70, 90, 88]

#图形配置

plt.title('条形图') #标题

plt.xlabel('班级')

plt.ylabel('成绩')

# 条形图

plt.bar(classes, score)

plt.show()

# 数据

year1 = range(2005, 2020)

height2 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]

plt.title('折线图')

plt.plot(year1, height2)

plt.show()

# 数据

labels = ['房贷', '购车', '教育', '饮食']

data = [4000, 2000, 6000, 1200]

plt.title('饼图')

plt.pie(data, labels=labels, autopct='%1.1f%%') # autopct='%1.1f%%'为保留一位小数

plt.show()

# 数据

data = [[12.2, 23.4], [14.5, 11.4], [15.8, 22.9]]

x = [item[0] for item in data]

y = [item[0] for item in data]

plt.title('散点图')

plt.scatter(x, y)

plt.xlabel('价格(元)')

plt.ylabel('销售(件)')

# 在指定的坐标嵌入文字

plt.text(12, 12, '牙膏')

plt.text(14, 14, '洗衣粉')

plt.text(15, 15, '衣服')

plt.show()

# 数据

data = [88, 78, 68, 79, 90, 89, 67, 76, 98, 30, 30]

plt.title('箱线图')

plt.boxplot(data)

plt.show()

# 极径和角度数据

r = [1, 2, 3, 4, 5] # 极径

theta = [0.0, 1.57, 3.14, 4.71, 6.28]

ax = plt.subplot(111, projection='polar') # 指定坐标轴为极坐标轴

plt.plot(theta, r) # 绘制极线图

# 指定坐标轴为极坐标轴

ax1 = plt.subplot(111, projection='polar')

# 绘制极坐标轴的示例

ax1.plot([1, 2, 3, 4, 5])

ax1.scatter([0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.4, 0.2, 0.8, 0.3])

plt.title('极线图')

plt.show() # 显示图形

# 数据

year = range(2005, 2020)

height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]

plt.title('阶梯图')

plt.step(year, height)

plt.show()

# 图形配置

x = [1, 2, 3]

name = ['一班', '二班', '三班']

y = [70, 90, 88]

# 柱状图

plt.bar(x, y)

# # 图形配置

plt.title('成绩柱状图') # 标题

plt.xlabel('班级')

plt.ylabel('成绩')

plt.xticks(x, name) # 设置X轴柱状图名称

for i in range(1, 4):

plt.text(i, y[i - 1] + 1, y[i - 1]) # 纵坐标的具体分数

plt.show()

# 数据: 三个学科的成绩

ch = [72, 80, 66, 77, 92]

math = [62, 92, 72, 75, 88]

eng = [88, 76, 45, 80, 98]

plt.title('堆积图')

plt.bar(range(1, 6), ch, color='r', label='语文成绩') # 绘制语文柱状图

plt.bar(range(1, 6), math, bottom=ch, color='g', label='数学成绩') # bottom=ch在语文柱状图的基础上绘制数学柱状图

chmath = [ch[i] + math[i] for i in range(5)] # 计算语文和数学成绩之和

plt.bar(range(1, 6), eng, bottom=chmath, color='b', label='英语成绩') # bottom=chmath在数学和语文之和柱状图的基础上英语柱状图

plt.show()

# 数据: 三个学科的成绩

c1 = [72, 80, 66]

c2 = [62, 92, 72]

c3 = [88, 76, 45]

name_list = ['语文', '数学', '英语']

width = 0.4 # 柱状图宽度

x = [1, 3, 5] # 柱状图之间的间隔

plt.bar(x, c1, label='c1', fc='r', width=width)

x = [1.4, 3.4, 5.4]

plt.bar(x, c2, label='c2', fc='g', width=width)

x = [1.8, 3.8, 5.8]

plt.bar(x, c3, label='c3', fc='b', width=width)

x = [1.4, 3.4, 5.4]

# 设置横坐标的名称

plt.xticks(x, name_list)

# 设置班级颜色

plt.legend()

plt.title('分块图-三班级成绩图')

plt.xlabel('科目')

plt.ylabel('成绩')

plt.show()

x = [22, 23, 24, 25, 26, 27, 28, 29, 30] # 随机生成年龄

y = [155, 150, 175, 180, 179, 190, 189, 170, 168] # 随机生成身高

z = [60, 66, 58, 76, 90, 89, 77, 88, 98] # 随机生成体重

# 绘制气泡图： s指定气泡的大小

plt.scatter(x, y, s=z)

plt.title('气泡图')

plt.show()

一. 柱形图

# Libraries

import numpy as np

import matplotlib.pyplot as plt

# Create dataset

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

x_pos = np.arange(len(bars))

# Create bars

plt.bar(x_pos, height)

# Create names on the x-axis

plt.xticks(x_pos, bars)

# Show graphic

plt.show()

二. 三色柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# set width of bars

barWidth = 0.25

# set heights of bars

bars1 = [12, 30, 1, 8, 22]

bars2 = [28, 6, 16, 5, 10]

bars3 = [29, 3, 24, 25, 17]

# Set position of bar on X axis

r1 = np.arange(len(bars1))

r2 = [x + barWidth for x in r1]

r3 = [x + barWidth for x in r2]

# Make the plot

plt.bar(r1, bars1, color='#7f6d5f', width=barWidth, edgecolor='white', label='var1')

plt.bar(r2, bars2, color='#557f2d', width=barWidth, edgecolor='white', label='var2')

plt.bar(r3, bars3, color='#2d7f5e', width=barWidth, edgecolor='white', label='var3')

# Add xticks on the middle of the group bars

plt.xlabel('group', fontweight='bold')

plt.xticks([r + barWidth for r in range(len(bars1))], ['A', 'B', 'C', 'D', 'E'])

# Create legend & Show graphic

plt.legend()

plt.show()

三. 叠加柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

from matplotlib import rc

import pandas as pd

# y-axis in bold

rc('font', weight='bold')

# Values of each group

bars1 = [12, 28, 1, 8, 22]

bars2 = [28, 7, 16, 4, 10]

bars3 = [25, 3, 23, 25, 17]

# Heights of bars1 + bars2

bars = np.add(bars1, bars2).tolist()

# The position of the bars on the x-axis

r = [0,1,2,3,4]

# Names of group and bar width

names = ['A','B','C','D','E']

barWidth = 1

# Create brown bars

plt.bar(r, bars1, color='#7f6d5f', edgecolor='white', width=barWidth)

# Create green bars (middle), on top of the first ones

plt.bar(r, bars2, bottom=bars1, color='#557f2d', edgecolor='white', width=barWidth)

# Create green bars (top)

plt.bar(r, bars3, bottom=bars, color='#2d7f5e', edgecolor='white', width=barWidth)

# Custom X axis

plt.xticks(r, names, fontweight='bold')

plt.xlabel("group")

# Show graphic

plt.show()

四. 叠加百分比柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

from matplotlib import rc

import pandas as pd

# Data

r = [0,1,2,3,4]

raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}

df = pd.DataFrame(raw_data)

# From raw value to percentage

totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]

greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]

orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]

blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]

# plot

barWidth = 0.85

names = ('A','B','C','D','E')

# Create green Bars

plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)

# Create orange Bars

plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)

# Create blue Bars

plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)

# Custom x axis

plt.xticks(r, names)

plt.xlabel("group")

# Show graphic

plt.show()

五. 叠加百分比柱形图

# libraries & dataset

import seaborn as sns

import matplotlib.pyplot as plt

# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)

sns.set(style="darkgrid")

df = sns.load_dataset("iris")

sns.histplot(data=df, y="sepal_length")

plt.show()

六. 紫色柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# create dataset

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

x_pos = np.arange(len(bars))

# Create bars and choose color

plt.bar(x_pos, height, color = (0.5,0.1,0.5,0.6))

# Add title and axis names

plt.title('My title')

plt.xlabel('categories')

plt.ylabel('values')

# Create names on the x axis

plt.xticks(x_pos, bars)

# Show graph

plt.show()

七.带条纹的条形图

# Libraries

import numpy as np

import matplotlib.pyplot as plt

# Create dataset

height = [2, 5, 4, 6]

bars = ('A', 'B', 'C', 'D')

x_pos = np.arange(len(bars))

# Create bars

figure = plt.bar(x_pos, height)

# Define some hatches

hatches = ['-', '/', '||', '///']

# Loop over bars and assign hatches

for bar, hatch in zip(figure, hatches):

bar.set_hatch(hatch)

# Create names on the x-axis

plt.xticks(x_pos, bars)

# Show graphic

plt.show()

八、带标签的不同色条形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# width of the bars

barWidth = 0.3

# Choose the height of the blue bars

bars1 = [10, 9, 2]

# Choose the height of the cyan bars

bars2 = [10.8, 9.5, 4.5]

# Choose the height of the error bars (bars1)

yer1 = [0.5, 0.4, 0.5]

# Choose the height of the error bars (bars2)

yer2 = [1, 0.7, 1]

# The x position of bars

r1 = np.arange(len(bars1))

r2 = [x + barWidth for x in r1]

# Create blue bars

plt.bar(r1, bars1, width = barWidth, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=7, label='poacee')

# Create cyan bars

plt.bar(r2, bars2, width = barWidth, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='sorgho')

# general layout

plt.xticks([r + barWidth for r in range(len(bars1))], ['cond_A', 'cond_B', 'cond_C'])

plt.ylabel('height')

plt.legend()

# Show graphic

plt.show()

九、详细的条形图

# library

import matplotlib.pyplot as plt

# Create bars

barWidth = 0.9

bars1 = [3, 3, 1]

bars2 = [4, 2, 3]

bars3 = [4, 6, 7, 10, 4, 4]

bars4 = bars1 + bars2 + bars3

# The X position of bars

r1 = [1,5,9]

r2 = [2,6,10]

r3 = [3,4,7,8,11,12]

r4 = r1 + r2 + r3

# Create barplot

plt.bar(r1, bars1, width = barWidth, color = (0.3,0.1,0.4,0.6), label='Alone')

plt.bar(r2, bars2, width = barWidth, color = (0.3,0.5,0.4,0.6), label='With Himself')

plt.bar(r3, bars3, width = barWidth, color = (0.3,0.9,0.4,0.6), label='With other genotype')

# Note: the barplot could be created easily. See the barplot section for other examples.

# Create legend

plt.legend()

# Text below each barplot with a rotation at 90°

plt.xticks([r + barWidth for r in range(len(r4))], ['DD', 'with himself', 'with DC', 'with Silur', 'DC', 'with himself', 'with DD', 'with Silur', 'Silur', 'with himself', 'with DD', 'with DC'], rotation=90)

# Create labels

label = ['n = 6', 'n = 25', 'n = 13', 'n = 36', 'n = 30', 'n = 11', 'n = 16', 'n = 37', 'n = 14', 'n = 4', 'n = 31', 'n = 34']

# Text on the top of each bar

for i in range(len(r4)):

plt.text(x = r4[i]-0.5 , y = bars4[i]+0.1, s = label[i], size = 6)

# Adjust the margins

plt.subplots_adjust(bottom= 0.2, top = 0.98)

# Show graphic

plt.show()

十、色块

# library

import seaborn as sns

import pandas as pd

import numpy as np

# Create a dataset

df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])

# Default heatmap: just a visualization of this square matrix

sns.heatmap(df)

十一、带数值的色块

# libraries

import seaborn as sns

import pandas as pd

import numpy as np

# Create a dataset

df = pd.DataFrame(np.random.random((10,10)), columns=["a","b","c","d","e","f","g","h","i","j"])

# plot a heatmap with annotation

sns.heatmap(df, annot=True, annot_kws={"size": 7})

十二、散点图

# libraries

import pandas as pd

import numpy as np

import matplotlib.pylab as plt

import seaborn as sns

# Create data frame with randomly selected x and y positions

df = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])

# Add a column: the color depends on x and y values, but you can use any function you want

value=(df['x']>0.2) & (df['y']>0.4)

df['color']= np.where( value==True , "#9b59b6", "#3498db")

# plot

sns.regplot(data=df, x="x", y="y", fit_reg=False, scatter_kws={'facecolors':df['color']})

plt.show()

十三、颜色渐变的散点图

# Libraries

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# create data

x = np.random.rand(80) - 0.5

y = x+np.random.rand(80)

z = x+np.random.rand(80)

df = pd.DataFrame({'x':x, 'y':y, 'z':z})

# Plot with palette

sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues")

plt.show()

# reverse palette

sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues_r")

plt.show()

十四、k线图

# libraries

import seaborn as sns

import numpy as np

import matplotlib.pyplot as plt

# Data

data = np.random.normal(size=(20, 6)) + np.arange(6) / 2

# Proposed themes: darkgrid, whitegrid, dark, white, and ticks

sns.set_style("whitegrid")

sns.boxplot(data=data)

plt.title("whitegrid")

plt.show()

sns.set_style("darkgrid")

sns.boxplot(data=data);

plt.title("darkgrid")

plt.show()

sns.set_style("white")

sns.boxplot(data=data);

plt.title("white")

plt.show()

sns.set_style("dark")

sns.boxplot(data=data);

plt.title("dark")

plt.show()

sns.set_style("ticks")

sns.boxplot(data=data);

plt.title("ticks")

plt.show()

十五、带点折线图

# library and dataset

from matplotlib import pyplot as plt

import pandas as pd

import numpy as np

# Create data

df=pd.DataFrame({'x_axis': range(1,101), 'y_axis': np.random.randn(100)*15+range(1,101), 'z': (np.random.randn(100)*15+range(1,101))*2 })

# plot with matplotlib

plt.plot( 'x_axis', 'y_axis', data=df, marker='o', color='mediumvioletred')

plt.show()

十六、对比折线图

# Libraries and data

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })

# Draw plot

plt.plot( 'x_values', 'y_values', data=df, color='skyblue')

plt.show()

# Draw line chart by modifiying transparency of the line

plt.plot( 'x_values', 'y_values', data=df, color='skyblue', alpha=0.3)

# Show plot

plt.show()

十七、虚线折线图

# Libraries and data

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })

# Draw line chart with dashed line

plt.plot( 'x_values', 'y_values', data=df, linestyle='dashed')

# Show graph

plt.show()

十八、多种颜色的折线图

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Make a data frame

df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14), 'y10': np.random.randn(10)+range(2,12) })

# Change the style of plot

plt.style.use('seaborn-darkgrid')

# Create a color palette

palette = plt.get_cmap('Set1')

# Plot multiple lines

num=0

for column in df.drop('x', axis=1):

num+=1

plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)

# Add legend

plt.legend(loc=2, ncol=2)

# Add titles

plt.title("A (bad) Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')

plt.xlabel("Time")

plt.ylabel("Score")

# Show the graph

plt.show()

十九、几种线

plt.plot( [1,1.1,1,1.1,1], linestyle='-' , linewidth=4)

plt.text(1.5, 1.3, "linestyle = '-' ", horizontalalignment='left', size='medium', color='C0', weight='semibold')

plt.plot( [2,2.1,2,2.1,2], linestyle='--' , linewidth=4 )

plt.text(1.5, 2.3, "linestyle = '--' ", horizontalalignment='left', size='medium', color='C1', weight='semibold')

plt.plot( [3,3.1,3,3.1,3], linestyle='-.' , linewidth=4 )

plt.text(1.5, 3.3, "linestyle = '-.' ", horizontalalignment='left', size='medium', color='C2', weight='semibold')

plt.plot( [4,4.1,4,4.1,4], linestyle=':' , linewidth=4 )

plt.text(1.5, 4.3, "linestyle = ':' ", horizontalalignment='left', size='medium', color='C3', weight='semibold')

plt.axis('off')

plt.show()

二十、几种折线

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Data

df=pd.DataFrame({'x_values': range(1,11), 'y1_values': np.random.randn(10), 'y2_values': np.random.randn(10)+range(1,11), 'y3_values': np.random.randn(10)+range(11,21) })

# multiple line plots

plt.plot( 'x_values', 'y1_values', data=df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4)

plt.plot( 'x_values', 'y2_values', data=df, marker='', color='olive', linewidth=2)

plt.plot( 'x_values', 'y3_values', data=df, marker='', color='olive', linewidth=2, linestyle='dashed', label="toto")

# show legend

plt.legend()

# show graph

plt.show()

二十一、小倍数线图

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Make a data frame

# Initialize the figure style

plt.style.use('seaborn-darkgrid')

# create a color palette

palette = plt.get_cmap('Set1')

# multiple line plot

num=0

for column in df.drop('x', axis=1):

num+=1

# Find the right spot on the plot

plt.subplot(3,3, num)

# Plot the lineplot

plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1.9, alpha=0.9, label=column)

# Same limits for every chart

plt.xlim(0,10)

plt.ylim(-2,22)

# Not ticks everywhere

if num in range(7) :

plt.tick_params(labelbottom='off')

if num not in [1,4,7] :

plt.tick_params(labelleft='off')

# Add title

plt.title(column, loc='left', fontsize=12, fontweight=0, color=palette(num) )

# general title

plt.suptitle("How the 9 students improved\nthese past few days?", fontsize=13, fontweight=0, color='black', style='italic', y=1.02)

# Axis titles

plt.text(0.5, 0.02, 'Time', ha='center', va='center')

plt.text(0.06, 0.5, 'Note', ha='center', va='center', rotation='vertical')

# Show the graph

plt.show()

二十二、基本连通散点图

# Libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Set figure default figure size

plt.rcParams["figure.figsize"] = (10, 6)

# Create a random number generator for reproducibility

rng = np.random.default_rng(1111)

# Get some random points!

x = np.array(range(10))

y = rng.integers(10, 100, 10)

z = y + rng.integers(5, 20, 10)

plt.plot(x, z, linestyle="-", marker="o", label="Income")

plt.plot(x, y, linestyle="-", marker="o", label="Expenses")

plt.legend()

plt.show()

二十三、如何避免使用 python 进行过度绘制

# libraries

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

import pandas as pd

# Dataset:

df=pd.DataFrame({'x': np.random.normal(10, 1.2, 20000), 'y': np.random.normal(10, 1.2, 20000), 'group': np.repeat('A',20000) })

tmp1=pd.DataFrame({'x': np.random.normal(14.5, 1.2, 20000), 'y': np.random.normal(14.5, 1.2, 20000), 'group': np.repeat('B',20000) })

tmp2=pd.DataFrame({'x': np.random.normal(9.5, 1.5, 20000), 'y': np.random.normal(15.5, 1.5, 20000), 'group': np.repeat('C',20000) })

df=df.append(tmp1).append(tmp2)

# plot

plt.plot( 'x', 'y', "", data=df, linestyle='', marker='o')

plt.xlabel('Value of X')

plt.ylabel('Value of Y')

plt.title('Overplotting looks like that:', loc='left')

plt.show()

二十四、基本甜甜圈

# library

import matplotlib.pyplot as plt

# create data

size_of_groups=[12,11,3,30]

# Create a pie plot

plt.pie(size_of_groups)

#plt.show()

# add a white circle at the center

my_circle=plt.Circle( (0,0), 0.7, color='white')

p=plt.gcf()

p.gca().add_artist(my_circle)

# show the graph

plt.show()

二十五、自定义甜甜圈

# library

import matplotlib.pyplot as plt

# create data

names = ['groupA', 'groupB', 'groupC', 'groupD']

size = [12,11,3,30]

# Create a circle at the center of the plot

my_circle = plt.Circle( (0,0), 0.7, color='white')

# Give color names

plt.pie(size, labels=names, colors=['red','green','blue','skyblue'])

p = plt.gcf()

p.gca().add_artist(my_circle)

# Show the graph

plt.show()

# library

import matplotlib.pyplot as plt

# create data

names = ['groupA', 'groupB', 'groupC', 'groupD']

size = [12,11,3,30]

# Create a circle at the center of the plot

my_circle = plt.Circle( (0,0), 0.7, color='white')

# Not enough colors --> colors will cycle

plt.pie(size, labels=names, colors=['red','green'])

p = plt.gcf()

p.gca().add_artist(my_circle)

# Show the graph

plt.show()

二十六、改变背景的甜甜圈

# library

import matplotlib.pyplot as plt

# Data

names = 'groupA', 'groupB', 'groupC', 'groupD',

size = [12,11,3,30]

# create a figure and set different background

fig = plt.figure()

fig.patch.set_facecolor('black')

# Change color of text

plt.rcParams['text.color'] = 'white'

# Create a circle at the center of the plot

my_circle=plt.Circle( (0,0), 0.7, color='black')

# Pieplot + circle on it

plt.pie(size, labels=names)

p=plt.gcf()

p.gca().add_artist(my_circle)

plt.show()

二十七、分组甜甜圈

# Libraries

import matplotlib.pyplot as plt

# Make data: I have 3 groups and 7 subgroups

group_names=['groupA', 'groupB', 'groupC']

group_size=[12,11,30]

subgroup_names=['A.1', 'A.2', 'A.3', 'B.1', 'B.2', 'C.1', 'C.2', 'C.3', 'C.4', 'C.5']

subgroup_size=[4,3,5,6,5,10,5,5,4,6]

# Create colors

a, b, c=[plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]

# First Ring (outside)

fig, ax = plt.subplots()

ax.axis('equal')

mypie, _ = ax.pie(group_size, radius=1.3, labels=group_names, colors=[a(0.6), b(0.6), c(0.6)] )

plt.setp( mypie, width=0.3, edgecolor='white')

# Second Ring (Inside)

mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=subgroup_names, labeldistance=0.7, colors=[a(0.5), a(0.4), a(0.3), b(0.5), b(0.4), c(0.6), c(0.5), c(0.4), c(0.3), c(0.2)])

plt.setp( mypie2, width=0.4, edgecolor='white')

plt.margins(0,0)

# show it

plt.show()

二十八、棒棒糖图

# libraries

import matplotlib.pyplot as plt

import numpy as np

# create data

x=range(1,41)

values=np.random.uniform(size=40)

# stem function

plt.stem(x, values)

plt.ylim(0, 1.2)

plt.show()

# stem function: If x is not provided, a sequence of numbers is created by python:

plt.stem(values)

plt.show()

二十九、自定义棒棒糖图

# libraries

import matplotlib.pyplot as plt

import numpy as np

# create data

values=np.random.uniform(size=40)

# plot without markers

plt.stem(values, markerfmt=' ')

plt.show()

# change color and shape and size and edges

(markers, stemlines, baseline) = plt.stem(values)

plt.setp(markers, marker='D', markersize=10, markeredgecolor="orange", markeredgewidth=2)

plt.show()

三十、垂直棒棒糖图

# libraries

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

# Create a dataframe

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })

# Reorder it based on the values

ordered_df = df.sort_values(by='values')

my_range=range(1,len(df.index)+1)

# The horizontal plot is made using the hline function

plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color='skyblue')

plt.plot(ordered_df['values'], my_range, "o")

# Add titles and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("A vertical lolipop plot", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# Show the plot

plt.show()

三十一、高光棒棒糖图

# libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataframe

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })

# Reorder it based on values:

ordered_df = df.sort_values(by='values')

my_range=range(1,len(df.index)+1)

# Create a color if the group is "B"

my_color=np.where(ordered_df ['group']=='B', 'orange', 'skyblue')

my_size=np.where(ordered_df ['group']=='B', 70, 30)

# The horizontal plot is made using the hline() function

plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color=my_color, alpha=0.4)

plt.scatter(ordered_df['values'], my_range, color=my_color, s=my_size, alpha=1)

# Add title and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("What about the B group?", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# show the graph

plt.show()

三十二、分组棒棒糖图

# libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataframe

value1=np.random.uniform(size=20)

value2=value1+np.random.uniform(size=20)/4

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })

# Reorder it following the values of the first value:

ordered_df = df.sort_values(by='value1')

my_range=range(1,len(df.index)+1)

# The horizontal plot is made using the hline function

plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)

plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')

plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')

plt.legend()

# Add title and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("Comparison of the value 1 and the value 2", loc='left')

plt.xlabel('Value of the variables')

plt.ylabel('Group')

# Show the graph

plt.show()

三十三、带条件色彩的棒棒糖图

#%%

# libraries

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

# Data

x = np.linspace(0, 2*np.pi, 100)

y = np.sin(x) + np.random.uniform(size=len(x)) - 0.2

# Create a color if the y axis value is equal or greater than 0

my_color = np.where(y>=0, 'orange', 'skyblue')

# The vertical plot is made using the vline function

plt.vlines(x=x, ymin=0, ymax=y, color=my_color, alpha=0.4)

plt.scatter(x, y, color=my_color, s=1, alpha=1)

# Add title and axis names

plt.title("Evolution of the value of ...", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# Show the graph

plt.show()

三十四、面积图

import numpy as np

import matplotlib.pyplot as plt

# Create data

x=range(1,6)

y=[1,4,6,8,4]

# Area plot

plt.fill_between(x, y)

# Show the graph

plt.show()

# Note that we could also use the stackplot function

# but fill_between is more convenient for future customization.

#plt.stackplot(x,y)

#plt.show()

三十五、改善区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# create data

x=range(1,15)

y=[1,4,6,8,4,5,3,2,4,1,5,6,8,7]

# Change the color and its transparency

plt.fill_between( x, y, color="skyblue", alpha=0.4)

# Show the graph

plt.show()

# Same, but add a stronger line on top (edge)

plt.fill_between( x, y, color="skyblue", alpha=0.2)

plt.plot(x, y, color="Slateblue", alpha=0.6)

# See the line plot function to learn how to customize the plt.plot function

# Show the graph

plt.show()

三十六、区域图表和分面

# libraries

import numpy as np

import seaborn as sns

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataset

my_count=["France","Australia","Japan","USA","Germany","Congo","China","England","Spain","Greece","Marocco","South Africa","Indonesia","Peru","Chili","Brazil"]

df = pd.DataFrame({

"country":np.repeat(my_count, 10),

"years":list(range(2000, 2010)) * 16,

"value":np.random.rand(160)

})

# Create a grid : initialize it

g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )

# Add the line over the area with the plot function

g = g.map(plt.plot, 'years', 'value')

# Fill the area with fill_between

g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")

# Control the title of each facet

g = g.set_titles("{col_name}")

# Add a title for the whole plot

plt.subplots_adjust(top=0.92)

g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')

# Show the graph

plt.show()

三十七、白色网格区域图

# libraries

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

# set the seaborn style

sns.set_style("whitegrid")

# Color palette

blue, = sns.color_palette("muted", 1)

# Create data

x = np.arange(23)

y = np.random.randint(8, 20, 23)

# Make the plot

fig, ax = plt.subplots()

ax.plot(x, y, color=blue, lw=3)

ax.fill_between(x, 0, y, alpha=.3)

ax.set(xlim=(0, len(x) - 1), ylim=(0, None), xticks=x)

# Show the graph

plt.show()

三十八、基本堆叠区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# --- FORMAT 1

# Your x and y axis

x=range(1,6)

y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]

# Basic stacked area chart.

plt.stackplot(x,y, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

# --- FORMAT 2

x=range(1,6)

y1=[1,4,6,8,9]

y2=[2,2,7,10,12]

y3=[2,8,5,10,6]

# Basic stacked area chart.

plt.stackplot(x,y1, y2, y3, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

三十九、海洋风格的堆叠区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# set seaborn style

sns.set_theme()

# Data

x=range(1,6)

y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]

# Plot

plt.stackplot(x,y, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

四十、基线选项堆叠面积图

# libraries

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# Create data

X = np.arange(0, 10, 1)

Y = X + 5 * np.random.random((5, X.size))

# There are 4 types of baseline we can use:

baseline = ["zero", "sym", "wiggle", "weighted_wiggle"]

# Let's make 4 plots, 1 for each baseline

for n, v in enumerate(baseline):

if n<3 :

plt.tick_params(labelbottom='off')

plt.subplot(2 ,2, n + 1)

plt.stackplot(X, *Y, baseline=v)

plt.title(v)

plt.tight_layout()

01. 小提琴图

小提琴图可以将一组或多组数据的数值变量分布可视化。

相比有时会隐藏数据特征的箱形图相比，小提琴图值得更多关注。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.violinplot(x=df["species"], y=df["sepal_length"])

plt.show()

02. 核密度估计图

核密度估计图其实是对直方图的一个自然拓展。

可以可视化一个或多个组的数值变量的分布，非常适合大型数据集。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.kdeplot(df['sepal_width'])

plt.show()

03. 直方图

直方图，可视化一组或多组数据的分布情况。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.distplot(a=df["sepal_length"], hist=True, kde=False, rug=False)

plt.show()

04. 箱形图

箱形图，可视化一组或多组数据的分布情况。

可以快速获得中位数、四分位数和异常值，但也隐藏数据集的各个数据点。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.boxplot(x=df["species"], y=df["sepal_length"])

plt.show()

06. 散点图

散点图，显示2个数值变量之间的关系。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.regplot(x=df["sepal_length"], y=df["sepal_width"])

plt.show()

08. 相关性图

相关性图或相关矩阵图，分析每对数据变量之间的关系。

相关性可视化为散点图，对角线用直方图或密度图表示每个变量的分布。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.pairplot(df)

plt.show()

10. 连接散点图

连接散点图就是一个线图，其中每个数据点由圆形或任何类型的标记展示。

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# 创建数据

df = pd.DataFrame({'x_axis': range(1, 10), 'y_axis': np.random.randn(9) * 80 + range(1, 10)})

# 绘制显示

plt.plot('x_axis', 'y_axis', data=df, linestyle='-', marker='o')

plt.show()

11. 二维密度图

二维密度图或二维直方图，可视化两个定量变量的组合分布。

它们总是在X轴上表示一个变量，另一个在Y轴上，就像散点图。

然后计算二维空间特定区域内的次数，并用颜色渐变表示。

形状变化：六边形a hexbin chart，正方形a 2d histogram，核密度2d density plots或contour plots。

import numpy as np

import matplotlib.pyplot as plt

from scipy.stats import kde

# 创建数据, 200个点

data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)

x, y = data.T

# 创建画布, 6个子图

fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))

# 第一个子图, 散点图

axes[0].set_title('Scatterplot')

axes[0].plot(x, y, 'ko')

# 第二个子图, 六边形

nbins = 20

axes[1].set_title('Hexbin')

axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)

# 2D 直方图

axes[2].set_title('2D Histogram')

axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)

# 高斯kde

k = kde.gaussian_kde(data.T)

xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j]

zi = k(np.vstack([xi.flatten(), yi.flatten()]))

# 密度图

axes[3].set_title('Calculate Gaussian KDE')

axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='auto', cmap=plt.cm.BuGn_r)

# 添加阴影

axes[4].set_title('2D Density with shading')

axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

# 添加轮廓

axes[5].set_title('Contour')

axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

axes[5].contour(xi, yi, zi.reshape(xi.shape))

plt.show()

12. 条形图

条形图表示多个明确的变量的数值关系。每个变量都为一个条形。条形的大小代表其数值。

import numpy as np

import matplotlib.pyplot as plt

# 生成随机数据

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

y_pos = np.arange(len(bars))

# 创建条形图

plt.bar(y_pos, height)

# x轴标签

plt.xticks(y_pos, bars)

# 显示

plt.show()

13. 雷达图

雷达图，可以可视化多个定量变量的一个或多个系列的值。

每个变量都有自己的轴，所有轴都连接在图形的中心。

import matplotlib.pyplot as plt

import pandas as pd

from math import pi

# 设置数据

df = pd.DataFrame({

'group': ['A', 'B', 'C', 'D'],

'var1': [38, 1.5, 30, 4],

'var2': [29, 10, 9, 34],

'var3': [8, 39, 23, 24],

'var4': [7, 31, 33, 14],

'var5': [28, 15, 32, 14]

})

# 目标数量

categories = list(df)[1:]

N = len(categories)

# 角度

angles = [n / float(N) * 2 * pi for n in range(N)]

angles += angles[:1]

# 初始化

ax = plt.subplot(111, polar=True)

# 设置第一处

ax.set_theta_offset(pi / 2)

ax.set_theta_direction(-1)

# 添加背景信息

plt.xticks(angles[:-1], categories)

ax.set_rlabel_position(0)

plt.yticks([10, 20, 30], ["10", "20", "30"], color="grey", size=7)

plt.ylim(0, 40)

# 添加数据图

# 第一个

values = df.loc[0].drop('group').values.flatten().tolist()

values += values[:1]

ax.plot(angles, values, linewidth=1, linestyle='solid', label="group A")

ax.fill(angles, values, 'b', alpha=0.1)

# 第二个

values = df.loc[1].drop('group').values.flatten().tolist()

values += values[:1]

ax.plot(angles, values, linewidth=1, linestyle='solid', label="group B")

ax.fill(angles, values, 'r', alpha=0.1)

# 添加图例

plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

# 显示

plt.show()

14. 词云图

词云图是文本数据的视觉表示。

单词通常是单个的，每个单词的重要性以字体大小或颜色表示。

from wordcloud import WordCloud

import matplotlib.pyplot as plt

# 添加词语

text=("Python Python Python Matplotlib Chart Wordcloud Boxplot")

# 创建词云对象

wordcloud = WordCloud(width=480, height=480, margin=0).generate(text)

# 显示词云图

plt.imshow(wordcloud, interpolation='bilinear')

plt.axis("off")

plt.margins(x=0, y=0)

plt.show()

15. 平行座标图

一个平行座标图，能够比较不同系列相同属性的数值情况。

Pandas可能是绘制平行坐标图的最佳方式。

import seaborn as sns

import matplotlib.pyplot as plt

from pandas.plotting import parallel_coordinates

# 读取数据

data = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 创建图表

parallel_coordinates(data, 'species', colormap=plt.get_cmap("Set2"))

# 显示

plt.show()

17. 径向柱图

径向柱图同样也是条形图的变形，但是使用极坐标而不是直角坐标系。

绘制起来有点麻烦，而且比柱状图准确度低，但更引人注目。

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

# 生成数据

df = pd.DataFrame(

{

'Name': ['item ' + str(i) for i in list(range(1, 51)) ],

'Value': np.random.randint(low=10, high=100, size=50)

})

# 排序

df = df.sort_values(by=['Value'])

# 初始化画布

plt.figure(figsize=(20, 10))

ax = plt.subplot(111, polar=True)

plt.axis('off')

# 设置图表参数

upperLimit = 100

lowerLimit = 30

labelPadding = 4

# 计算最大值

max = df['Value'].max()

# 数据下限10, 上限100

slope = (max - lowerLimit) / max

heights = slope * df.Value + lowerLimit

# 计算条形图的宽度

width = 2*np.pi / len(df.index)

# 计算角度

indexes = list(range(1, len(df.index)+1))

angles = [element * width for element in indexes]

# 绘制条形图

bars = ax.bar(

x=angles,

height=heights,

width=width,

bottom=lowerLimit,

linewidth=2,

edgecolor="white",

color="#61a4b2",

)

# 添加标签

for bar, angle, height, label in zip(bars,angles, heights, df["Name"]):

# 旋转

rotation = np.rad2deg(angle)

# 翻转

alignment = ""

if angle >= np.pi/2 and angle < 3*np.pi/2:

alignment = "right"

rotation = rotation + 180

else:

alignment = "left"

# 最后添加标签

ax.text(

x=angle,

y=lowerLimit + bar.get_height() + labelPadding,

s=label,

ha=alignment,

va='center',

rotation=rotation,

rotation_mode="anchor")

plt.show()

19. 维恩图

维恩图，显示不同组之间所有可能的关系。

import matplotlib.pyplot as plt

from matplotlib_venn import venn2

# 创建图表

venn2(subsets=(10, 5, 2), set_labels=('Group A', 'Group B'))

# 显示

plt.show()

20. 圆环图

圆环图，本质上就是一个饼图，中间切掉了一个区域。

import matplotlib.pyplot as plt

# 创建数据

size_of_groups = [12, 11, 3, 30]

# 生成饼图

plt.pie(size_of_groups)

# 在中心添加一个圆, 生成环形图

my_circle = plt.Circle((0, 0), 0.7, color='white')

p = plt.gcf()

p.gca().add_artist(my_circle)

plt.show()

21. 饼图

饼图，最常见的可视化图表之一。

将圆划分成一个个扇形区域，每个区域代表在整体中所占的比例。

import matplotlib.pyplot as plt

# 创建数据

size_of_groups = [12, 11, 3, 30]

# 生成饼图

plt.pie(size_of_groups)

plt.show()

22. 树图

树图主要用来可视化树形数据结构，是一种特殊的层次类型，具有唯一的根节点，左子树，和右子树。

import pandas as pd

from matplotlib import pyplot as plt

from scipy.cluster.hierarchy import dendrogram, linkage

# 读取数据

df = pd.read_csv('mtcars.csv')

df = df.set_index('model')

# 计算每个样本之间的距离

Z = linkage(df, 'ward')

# 绘图

dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)

# 显示

plt.show()

25. 面积图

面积图和折线图非常相似，区别在于和x坐标轴间是否被颜色填充。

import matplotlib.pyplot as plt

# 创建数据

x = range(1, 6)

y = [1, 4, 6, 8, 4]

# 生成图表

plt.fill_between(x, y)

plt.show()

使用Matplotlib的fill_between()进行绘制，结果如下。

b5d3cdba052058d381dcdcd7c28cb680.png

26. 堆叠面积图

堆叠面积图表示若干个数值变量的数值演变。

每个显示在彼此的顶部，易于读取总数，但较难准确读取每个的值。

import matplotlib.pyplot as plt

# 创建数据

x = range(1, 6)

y1 = [1, 4, 6, 8, 9]

y2 = [2, 2, 7, 10, 12]

y3 = [2, 8, 5, 10, 6]

# 生成图表

plt.stackplot(x, y1, y2, y3, labels=['A', 'B', 'C'])

plt.legend(loc='upper left')

plt.show()

27. 河流图

河流图是一种特殊的流图, 它主要用来表示事件或主题等在一段时间内的变化。

围绕着中心轴显示，且边缘是圆形的，从而形成流动的形状。

import matplotlib.pyplot as plt

import numpy as np

from scipy import stats

# 添加数据

x = np.arange(1990, 2020)

y = [np.random.randint(0, 5, size=30) for _ in range(5)]

def gaussian_smooth(x, y, grid, sd):

"""平滑曲线"""

weights = np.transpose([stats.norm.pdf(grid, m, sd) for m in x])

weights = weights / weights.sum(0)

return (weights * y).sum(1)

# 自定义颜色

COLORS = ["#D0D1E6", "#A6BDDB", "#74A9CF", "#2B8CBE", "#045A8D"]

# 创建画布

fig, ax = plt.subplots(figsize=(10, 7))

# 生成图表

grid = np.linspace(1985, 2025, num=500)

y_smoothed = [gaussian_smooth(x, y_, grid, 1) for y_ in y]

ax.stackplot(grid, y_smoothed, colors=COLORS, baseline="sym")

# 显示

plt.show()

28. 时间序列图

时间序列图是指能够展示数值演变的所有图表。

比如折线图、柱状图、面积图等等。

import numpy as np

import seaborn as sns

import pandas as pd

import matplotlib.pyplot as plt

# 创建数据

my_count = ["France", "Australia", "Japan", "USA", "Germany", "Congo", "China", "England", "Spain", "Greece", "Marocco",

"South Africa", "Indonesia", "Peru", "Chili", "Brazil"]

df = pd.DataFrame({

"country": np.repeat(my_count, 10),

"years": list(range(2000, 2010)) * 16,

"value": np.random.rand(160)

})

# 创建网格

g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )

# 添加曲线图

g = g.map(plt.plot, 'years', 'value')

# 面积图

g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")

# 标题

import pandas as pd

import matplotlib.pyplot as plt

data=pd.read_csv('SecondhandHouse_view.csv')

plt.rcParams['font.sans-serif'] = 'SimHei'#让标签可以以中文形式呈现

plt.rcParams['axes.unicode_minus']=False

#任务1：散点图

plt.scatter(data['建筑面积'], data['总价'])#需要的列表数据

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析.png')

plt.show()

#折线图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.plot(data['建筑面积'], data['总价'])

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析折线图1.png')

plt.show()

#饼状图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.pie(data['总价'], labels=data['建筑面积'], autopct='%1.1f%%')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析饼状图2.png')

plt.show()

#箱装图

plt.rcParams['font.sans-serif'] = 'SimHei'

plt.boxplot(data['总价'], labels=['总价'])

plt.xlabel('建筑面积（平米）')

plt.ylabel('总价（万）')

plt.title('二手房建筑面积与房价的关系分析')

plt.savefig('二手房建筑面积与房价的关系分析箱装图3.png')

plt.show()

# 任务2：将“总价”列的数值按照指定区间划分至不同等级，并绘制不同等级的二手房出售数量分布的柱状图

data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))

grade_counts = data['等级'].value_counts().sort_index()

grade_counts.plot(kind='bar', x='等级', y='总价（万）')

plt.xticks([i for i in range(1, 11)],rotation=45)

plt.xlabel('等级')

plt.ylabel('二手房出售数量')

plt.savefig('柱状图.png')

plt.show()

# 任务3：绘制不同区域二手房平均房价与出售数量的组合图

# 统计不同区域的二手房总价的平均值和二手房出售数量；二手房平均价使用柱状图，

# 二手房出售数量使用折线图，其中x轴数值为二手房区域；由于二手房平均价与出售数量的数值差距过大，

# 设定双y轴与对应的轴标签。

# 统计不同区域的二手房总价的平均值和二手房出售数量

average_prices = data.groupby('区域')['总价'].mean()

sale_quantities = data.groupby('区域')['总价'].count()

from matplotlib.font_manager import FontProperties

# 创建画布和子图

fig, ax1 = plt.subplots()

plt.rcParams['font.sans-serif']='SimHei' #设置中文显示

plt.rcParams['axes.unicode_minus']=False

# 绘制柱状图（二手房平均价）

ax1.bar(average_prices.index, average_prices, alpha=0.5)

ax1.set_ylabel('二手房平均价', color='blue')

ax1.tick_params(axis='y', labelcolor='blue')

# 创建第二个坐标轴共享同一个x轴

ax2 = ax1.twinx()

# 绘制折线图（二手房出售数量）

ax2.plot(sale_quantities.index, sale_quantities, color='red', marker='o')

ax2.set_ylabel('二手房出售数量', color='red')

ax2.tick_params(axis='y', labelcolor='red')

# 设置x轴标签为二手房区域

plt.xticks(range(len(average_prices)), average_prices.index)

# 添加图例

ax1.legend(['平均价'], loc='upper left')

ax2.legend(['出售数量'], loc='upper right')

# 显示图形

plt.show()

import numpy as np

import matplotlib.pyplot as plt

data = np.load('国民经济核算季度数据.npz', allow_pickle=True)

columns = data['columns']#表头有多少类别

values = data['values']#对应上面的类别分别是什么数据

print(columns)

print(values)

print(data['values'].shape)#多少类别

import matplotlib.pyplot as plt

import matplotlib as mpl

import random

"""

常见的可视化形式：

1，统计图：直方图、折线图、饼图

2，分布图：热力图、散点图、气泡图

数据可视化工具：

1，分析工具：pandas，SciPy , numpy , sklearn

2，绘图工具：matplotlib, Pychart, reportlab

3，平台工具：Jupyter Notebook, PyCharm

"""

x = [1, 2]

y = [-3, 4]

plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文

plt.rcParams['axes.unicode_minus'] = False

plt.title('柱状图')

plt.bar(x, y)

plt.show()

#plt.rcParams['lines.linewidth'] = 10

#plt.rcParams['lines.linestyle'] = '--'#虚线

plt.title('虚线图')

plt.plot(x, y)

plt.show()

# 身高数据

height1 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170, 190]

bins1 = range(110, 191,5) # 定义区间

plt.title('直方图')

# 绘制直方图

plt.hist(height1, bins=bins1)

plt.show()

# 数据

classes = ['c1', 'c2', 'c3']

score = [70, 90, 88]

#图形配置

plt.title('条形图') #标题

plt.xlabel('班级')

plt.ylabel('成绩')

# 条形图

plt.bar(classes, score)

plt.show()

# 数据

year1 = range(2005, 2020)

height2 = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]

plt.title('折线图')

plt.plot(year1, height2)

plt.show()

# 数据

labels = ['房贷', '购车', '教育', '饮食']

data = [4000, 2000, 6000, 1200]

plt.title('饼图')

plt.pie(data, labels=labels, autopct='%1.1f%%') # autopct='%1.1f%%'为保留一位小数

plt.show()

# 数据

data = [[12.2, 23.4], [14.5, 11.4], [15.8, 22.9]]

x = [item[0] for item in data]

y = [item[0] for item in data]

plt.title('散点图')

plt.scatter(x, y)

plt.xlabel('价格(元)')

plt.ylabel('销售(件)')

# 在指定的坐标嵌入文字

plt.text(12, 12, '牙膏')

plt.text(14, 14, '洗衣粉')

plt.text(15, 15, '衣服')

plt.show()

# 数据

data = [88, 78, 68, 79, 90, 89, 67, 76, 98, 30, 30]

plt.title('箱线图')

plt.boxplot(data)

plt.show()

# 极径和角度数据

r = [1, 2, 3, 4, 5] # 极径

theta = [0.0, 1.57, 3.14, 4.71, 6.28]

ax = plt.subplot(111, projection='polar') # 指定坐标轴为极坐标轴

plt.plot(theta, r) # 绘制极线图

# 指定坐标轴为极坐标轴

ax1 = plt.subplot(111, projection='polar')

# 绘制极坐标轴的示例

ax1.plot([1, 2, 3, 4, 5])

ax1.scatter([0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.4, 0.2, 0.8, 0.3])

plt.title('极线图')

plt.show() # 显示图形

# 数据

year = range(2005, 2020)

height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]

plt.title('阶梯图')

plt.step(year, height)

plt.show()

# 图形配置

x = [1, 2, 3]

name = ['一班', '二班', '三班']

y = [70, 90, 88]

# 柱状图

plt.bar(x, y)

# # 图形配置

plt.title('成绩柱状图') # 标题

plt.xlabel('班级')

plt.ylabel('成绩')

plt.xticks(x, name) # 设置X轴柱状图名称

for i in range(1, 4):

plt.text(i, y[i - 1] + 1, y[i - 1]) # 纵坐标的具体分数

plt.show()

# 数据: 三个学科的成绩

ch = [72, 80, 66, 77, 92]

math = [62, 92, 72, 75, 88]

eng = [88, 76, 45, 80, 98]

plt.title('堆积图')

plt.bar(range(1, 6), ch, color='r', label='语文成绩') # 绘制语文柱状图

plt.bar(range(1, 6), math, bottom=ch, color='g', label='数学成绩') # bottom=ch在语文柱状图的基础上绘制数学柱状图

chmath = [ch[i] + math[i] for i in range(5)] # 计算语文和数学成绩之和

plt.bar(range(1, 6), eng, bottom=chmath, color='b', label='英语成绩') # bottom=chmath在数学和语文之和柱状图的基础上英语柱状图

plt.show()

# 数据: 三个学科的成绩

c1 = [72, 80, 66]

c2 = [62, 92, 72]

c3 = [88, 76, 45]

name_list = ['语文', '数学', '英语']

width = 0.4 # 柱状图宽度

x = [1, 3, 5] # 柱状图之间的间隔

plt.bar(x, c1, label='c1', fc='r', width=width)

x = [1.4, 3.4, 5.4]

plt.bar(x, c2, label='c2', fc='g', width=width)

x = [1.8, 3.8, 5.8]

plt.bar(x, c3, label='c3', fc='b', width=width)

x = [1.4, 3.4, 5.4]

# 设置横坐标的名称

plt.xticks(x, name_list)

# 设置班级颜色

plt.legend()

plt.title('分块图-三班级成绩图')

plt.xlabel('科目')

plt.ylabel('成绩')

plt.show()

x = [22, 23, 24, 25, 26, 27, 28, 29, 30] # 随机生成年龄

y = [155, 150, 175, 180, 179, 190, 189, 170, 168] # 随机生成身高

z = [60, 66, 58, 76, 90, 89, 77, 88, 98] # 随机生成体重

# 绘制气泡图： s指定气泡的大小

plt.scatter(x, y, s=z)

plt.title('气泡图')

plt.show()

一. 柱形图

# Libraries

import numpy as np

import matplotlib.pyplot as plt

# Create dataset

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

x_pos = np.arange(len(bars))

# Create bars

plt.bar(x_pos, height)

# Create names on the x-axis

plt.xticks(x_pos, bars)

# Show graphic

plt.show()

二. 三色柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# set width of bars

barWidth = 0.25

# set heights of bars

bars1 = [12, 30, 1, 8, 22]

bars2 = [28, 6, 16, 5, 10]

bars3 = [29, 3, 24, 25, 17]

# Set position of bar on X axis

r1 = np.arange(len(bars1))

r2 = [x + barWidth for x in r1]

r3 = [x + barWidth for x in r2]

# Make the plot

plt.bar(r1, bars1, color='#7f6d5f', width=barWidth, edgecolor='white', label='var1')

plt.bar(r2, bars2, color='#557f2d', width=barWidth, edgecolor='white', label='var2')

plt.bar(r3, bars3, color='#2d7f5e', width=barWidth, edgecolor='white', label='var3')

# Add xticks on the middle of the group bars

plt.xlabel('group', fontweight='bold')

plt.xticks([r + barWidth for r in range(len(bars1))], ['A', 'B', 'C', 'D', 'E'])

# Create legend & Show graphic

plt.legend()

plt.show()

三. 叠加柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

from matplotlib import rc

import pandas as pd

# y-axis in bold

rc('font', weight='bold')

# Values of each group

bars1 = [12, 28, 1, 8, 22]

bars2 = [28, 7, 16, 4, 10]

bars3 = [25, 3, 23, 25, 17]

# Heights of bars1 + bars2

bars = np.add(bars1, bars2).tolist()

# The position of the bars on the x-axis

r = [0,1,2,3,4]

# Names of group and bar width

names = ['A','B','C','D','E']

barWidth = 1

# Create brown bars

plt.bar(r, bars1, color='#7f6d5f', edgecolor='white', width=barWidth)

# Create green bars (middle), on top of the first ones

plt.bar(r, bars2, bottom=bars1, color='#557f2d', edgecolor='white', width=barWidth)

# Create green bars (top)

plt.bar(r, bars3, bottom=bars, color='#2d7f5e', edgecolor='white', width=barWidth)

# Custom X axis

plt.xticks(r, names, fontweight='bold')

plt.xlabel("group")

# Show graphic

plt.show()

四. 叠加百分比柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

from matplotlib import rc

import pandas as pd

# Data

r = [0,1,2,3,4]

raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}

df = pd.DataFrame(raw_data)

# From raw value to percentage

totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]

greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]

orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]

blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]

# plot

barWidth = 0.85

names = ('A','B','C','D','E')

# Create green Bars

plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)

# Create orange Bars

plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)

# Create blue Bars

plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)

# Custom x axis

plt.xticks(r, names)

plt.xlabel("group")

# Show graphic

plt.show()

五. 叠加百分比柱形图

# libraries & dataset

import seaborn as sns

import matplotlib.pyplot as plt

# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)

sns.set(style="darkgrid")

df = sns.load_dataset("iris")

sns.histplot(data=df, y="sepal_length")

plt.show()

六. 紫色柱形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# create dataset

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

x_pos = np.arange(len(bars))

# Create bars and choose color

plt.bar(x_pos, height, color = (0.5,0.1,0.5,0.6))

# Add title and axis names

plt.title('My title')

plt.xlabel('categories')

plt.ylabel('values')

# Create names on the x axis

plt.xticks(x_pos, bars)

# Show graph

plt.show()

七.带条纹的条形图

# Libraries

import numpy as np

import matplotlib.pyplot as plt

# Create dataset

height = [2, 5, 4, 6]

bars = ('A', 'B', 'C', 'D')

x_pos = np.arange(len(bars))

# Create bars

figure = plt.bar(x_pos, height)

# Define some hatches

hatches = ['-', '/', '||', '///']

# Loop over bars and assign hatches

for bar, hatch in zip(figure, hatches):

bar.set_hatch(hatch)

# Create names on the x-axis

plt.xticks(x_pos, bars)

# Show graphic

plt.show()

八、带标签的不同色条形图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# width of the bars

barWidth = 0.3

# Choose the height of the blue bars

bars1 = [10, 9, 2]

# Choose the height of the cyan bars

bars2 = [10.8, 9.5, 4.5]

# Choose the height of the error bars (bars1)

yer1 = [0.5, 0.4, 0.5]

# Choose the height of the error bars (bars2)

yer2 = [1, 0.7, 1]

# The x position of bars

r1 = np.arange(len(bars1))

r2 = [x + barWidth for x in r1]

# Create blue bars

plt.bar(r1, bars1, width = barWidth, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=7, label='poacee')

# Create cyan bars

plt.bar(r2, bars2, width = barWidth, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='sorgho')

# general layout

plt.xticks([r + barWidth for r in range(len(bars1))], ['cond_A', 'cond_B', 'cond_C'])

plt.ylabel('height')

plt.legend()

# Show graphic

plt.show()

九、详细的条形图

# library

import matplotlib.pyplot as plt

# Create bars

barWidth = 0.9

bars1 = [3, 3, 1]

bars2 = [4, 2, 3]

bars3 = [4, 6, 7, 10, 4, 4]

bars4 = bars1 + bars2 + bars3

# The X position of bars

r1 = [1,5,9]

r2 = [2,6,10]

r3 = [3,4,7,8,11,12]

r4 = r1 + r2 + r3

# Create barplot

plt.bar(r1, bars1, width = barWidth, color = (0.3,0.1,0.4,0.6), label='Alone')

plt.bar(r2, bars2, width = barWidth, color = (0.3,0.5,0.4,0.6), label='With Himself')

plt.bar(r3, bars3, width = barWidth, color = (0.3,0.9,0.4,0.6), label='With other genotype')

# Note: the barplot could be created easily. See the barplot section for other examples.

# Create legend

plt.legend()

# Text below each barplot with a rotation at 90°

# Create labels

label = ['n = 6', 'n = 25', 'n = 13', 'n = 36', 'n = 30', 'n = 11', 'n = 16', 'n = 37', 'n = 14', 'n = 4', 'n = 31', 'n = 34']

# Text on the top of each bar

for i in range(len(r4)):

plt.text(x = r4[i]-0.5 , y = bars4[i]+0.1, s = label[i], size = 6)

# Adjust the margins

plt.subplots_adjust(bottom= 0.2, top = 0.98)

# Show graphic

plt.show()

十、色块

# library

import seaborn as sns

import pandas as pd

import numpy as np

# Create a dataset

df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])

# Default heatmap: just a visualization of this square matrix

sns.heatmap(df)

十一、带数值的色块

# libraries

import seaborn as sns

import pandas as pd

import numpy as np

# Create a dataset

df = pd.DataFrame(np.random.random((10,10)), columns=["a","b","c","d","e","f","g","h","i","j"])

# plot a heatmap with annotation

sns.heatmap(df, annot=True, annot_kws={"size": 7})

十二、散点图

# libraries

import pandas as pd

import numpy as np

import matplotlib.pylab as plt

import seaborn as sns

# Create data frame with randomly selected x and y positions

df = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])

# Add a column: the color depends on x and y values, but you can use any function you want

value=(df['x']>0.2) & (df['y']>0.4)

df['color']= np.where( value==True , "#9b59b6", "#3498db")

# plot

sns.regplot(data=df, x="x", y="y", fit_reg=False, scatter_kws={'facecolors':df['color']})

plt.show()

十三、颜色渐变的散点图

# Libraries

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# create data

x = np.random.rand(80) - 0.5

y = x+np.random.rand(80)

z = x+np.random.rand(80)

df = pd.DataFrame({'x':x, 'y':y, 'z':z})

# Plot with palette

sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues")

plt.show()

# reverse palette

sns.lmplot( x='x', y='y', data=df, fit_reg=False, hue='x', legend=False, palette="Blues_r")

plt.show()

十四、k线图

# libraries

import seaborn as sns

import numpy as np

import matplotlib.pyplot as plt

# Data

data = np.random.normal(size=(20, 6)) + np.arange(6) / 2

# Proposed themes: darkgrid, whitegrid, dark, white, and ticks

sns.set_style("whitegrid")

sns.boxplot(data=data)

plt.title("whitegrid")

plt.show()

sns.set_style("darkgrid")

sns.boxplot(data=data);

plt.title("darkgrid")

plt.show()

sns.set_style("white")

sns.boxplot(data=data);

plt.title("white")

plt.show()

sns.set_style("dark")

sns.boxplot(data=data);

plt.title("dark")

plt.show()

sns.set_style("ticks")

sns.boxplot(data=data);

plt.title("ticks")

plt.show()

十五、带点折线图

# library and dataset

from matplotlib import pyplot as plt

import pandas as pd

import numpy as np

# Create data

df=pd.DataFrame({'x_axis': range(1,101), 'y_axis': np.random.randn(100)*15+range(1,101), 'z': (np.random.randn(100)*15+range(1,101))*2 })

# plot with matplotlib

plt.plot( 'x_axis', 'y_axis', data=df, marker='o', color='mediumvioletred')

plt.show()

十六、对比折线图

# Libraries and data

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })

# Draw plot

plt.plot( 'x_values', 'y_values', data=df, color='skyblue')

plt.show()

# Draw line chart by modifiying transparency of the line

plt.plot( 'x_values', 'y_values', data=df, color='skyblue', alpha=0.3)

# Show plot

plt.show()

十七、虚线折线图

# Libraries and data

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

df=pd.DataFrame({'x_values': range(1,11), 'y_values': np.random.randn(10) })

# Draw line chart with dashed line

plt.plot( 'x_values', 'y_values', data=df, linestyle='dashed')

# Show graph

plt.show()

十八、多种颜色的折线图

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Make a data frame

# Change the style of plot

plt.style.use('seaborn-darkgrid')

# Create a color palette

palette = plt.get_cmap('Set1')

# Plot multiple lines

num=0

for column in df.drop('x', axis=1):

num+=1

plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)

# Add legend

plt.legend(loc=2, ncol=2)

# Add titles

plt.title("A (bad) Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')

plt.xlabel("Time")

plt.ylabel("Score")

# Show the graph

plt.show()

十九、几种线

plt.plot( [1,1.1,1,1.1,1], linestyle='-' , linewidth=4)

plt.text(1.5, 1.3, "linestyle = '-' ", horizontalalignment='left', size='medium', color='C0', weight='semibold')

plt.plot( [2,2.1,2,2.1,2], linestyle='--' , linewidth=4 )

plt.text(1.5, 2.3, "linestyle = '--' ", horizontalalignment='left', size='medium', color='C1', weight='semibold')

plt.plot( [3,3.1,3,3.1,3], linestyle='-.' , linewidth=4 )

plt.text(1.5, 3.3, "linestyle = '-.' ", horizontalalignment='left', size='medium', color='C2', weight='semibold')

plt.plot( [4,4.1,4,4.1,4], linestyle=':' , linewidth=4 )

plt.text(1.5, 4.3, "linestyle = ':' ", horizontalalignment='left', size='medium', color='C3', weight='semibold')

plt.axis('off')

plt.show()

二十、几种折线

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Data

df=pd.DataFrame({'x_values': range(1,11), 'y1_values': np.random.randn(10), 'y2_values': np.random.randn(10)+range(1,11), 'y3_values': np.random.randn(10)+range(11,21) })

# multiple line plots

plt.plot( 'x_values', 'y1_values', data=df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4)

plt.plot( 'x_values', 'y2_values', data=df, marker='', color='olive', linewidth=2)

plt.plot( 'x_values', 'y3_values', data=df, marker='', color='olive', linewidth=2, linestyle='dashed', label="toto")

# show legend

plt.legend()

# show graph

plt.show()

二十一、小倍数线图

# libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Make a data frame

# Initialize the figure style

plt.style.use('seaborn-darkgrid')

# create a color palette

palette = plt.get_cmap('Set1')

# multiple line plot

num=0

for column in df.drop('x', axis=1):

num+=1

# Find the right spot on the plot

plt.subplot(3,3, num)

# Plot the lineplot

plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1.9, alpha=0.9, label=column)

# Same limits for every chart

plt.xlim(0,10)

plt.ylim(-2,22)

# Not ticks everywhere

if num in range(7) :

plt.tick_params(labelbottom='off')

if num not in [1,4,7] :

plt.tick_params(labelleft='off')

# Add title

plt.title(column, loc='left', fontsize=12, fontweight=0, color=palette(num) )

# general title

plt.suptitle("How the 9 students improved\nthese past few days?", fontsize=13, fontweight=0, color='black', style='italic', y=1.02)

# Axis titles

plt.text(0.5, 0.02, 'Time', ha='center', va='center')

plt.text(0.06, 0.5, 'Note', ha='center', va='center', rotation='vertical')

# Show the graph

plt.show()

二十二、基本连通散点图

# Libraries

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# Set figure default figure size

plt.rcParams["figure.figsize"] = (10, 6)

# Create a random number generator for reproducibility

rng = np.random.default_rng(1111)

# Get some random points!

x = np.array(range(10))

y = rng.integers(10, 100, 10)

z = y + rng.integers(5, 20, 10)

plt.plot(x, z, linestyle="-", marker="o", label="Income")

plt.plot(x, y, linestyle="-", marker="o", label="Expenses")

plt.legend()

plt.show()

二十三、如何避免使用 python 进行过度绘制

# libraries

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

import pandas as pd

# Dataset:

df=pd.DataFrame({'x': np.random.normal(10, 1.2, 20000), 'y': np.random.normal(10, 1.2, 20000), 'group': np.repeat('A',20000) })

tmp1=pd.DataFrame({'x': np.random.normal(14.5, 1.2, 20000), 'y': np.random.normal(14.5, 1.2, 20000), 'group': np.repeat('B',20000) })

tmp2=pd.DataFrame({'x': np.random.normal(9.5, 1.5, 20000), 'y': np.random.normal(15.5, 1.5, 20000), 'group': np.repeat('C',20000) })

df=df.append(tmp1).append(tmp2)

# plot

plt.plot( 'x', 'y', "", data=df, linestyle='', marker='o')

plt.xlabel('Value of X')

plt.ylabel('Value of Y')

plt.title('Overplotting looks like that:', loc='left')

plt.show()

二十四、基本甜甜圈

# library

import matplotlib.pyplot as plt

# create data

size_of_groups=[12,11,3,30]

# Create a pie plot

plt.pie(size_of_groups)

#plt.show()

# add a white circle at the center

my_circle=plt.Circle( (0,0), 0.7, color='white')

p=plt.gcf()

p.gca().add_artist(my_circle)

# show the graph

plt.show()

二十五、自定义甜甜圈

# library

import matplotlib.pyplot as plt

# create data

names = ['groupA', 'groupB', 'groupC', 'groupD']

size = [12,11,3,30]

# Create a circle at the center of the plot

my_circle = plt.Circle( (0,0), 0.7, color='white')

# Give color names

plt.pie(size, labels=names, colors=['red','green','blue','skyblue'])

p = plt.gcf()

p.gca().add_artist(my_circle)

# Show the graph

plt.show()

# library

import matplotlib.pyplot as plt

# create data

names = ['groupA', 'groupB', 'groupC', 'groupD']

size = [12,11,3,30]

# Create a circle at the center of the plot

my_circle = plt.Circle( (0,0), 0.7, color='white')

# Not enough colors --> colors will cycle

plt.pie(size, labels=names, colors=['red','green'])

p = plt.gcf()

p.gca().add_artist(my_circle)

# Show the graph

plt.show()

二十六、改变背景的甜甜圈

# library

import matplotlib.pyplot as plt

# Data

names = 'groupA', 'groupB', 'groupC', 'groupD',

size = [12,11,3,30]

# create a figure and set different background

fig = plt.figure()

fig.patch.set_facecolor('black')

# Change color of text

plt.rcParams['text.color'] = 'white'

# Create a circle at the center of the plot

my_circle=plt.Circle( (0,0), 0.7, color='black')

# Pieplot + circle on it

plt.pie(size, labels=names)

p=plt.gcf()

p.gca().add_artist(my_circle)

plt.show()

二十七、分组甜甜圈

# Libraries

import matplotlib.pyplot as plt

# Make data: I have 3 groups and 7 subgroups

group_names=['groupA', 'groupB', 'groupC']

group_size=[12,11,30]

subgroup_names=['A.1', 'A.2', 'A.3', 'B.1', 'B.2', 'C.1', 'C.2', 'C.3', 'C.4', 'C.5']

subgroup_size=[4,3,5,6,5,10,5,5,4,6]

# Create colors

a, b, c=[plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]

# First Ring (outside)

fig, ax = plt.subplots()

ax.axis('equal')

mypie, _ = ax.pie(group_size, radius=1.3, labels=group_names, colors=[a(0.6), b(0.6), c(0.6)] )

plt.setp( mypie, width=0.3, edgecolor='white')

# Second Ring (Inside)

mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=subgroup_names, labeldistance=0.7, colors=[a(0.5), a(0.4), a(0.3), b(0.5), b(0.4), c(0.6), c(0.5), c(0.4), c(0.3), c(0.2)])

plt.setp( mypie2, width=0.4, edgecolor='white')

plt.margins(0,0)

# show it

plt.show()

二十八、棒棒糖图

# libraries

import matplotlib.pyplot as plt

import numpy as np

# create data

x=range(1,41)

values=np.random.uniform(size=40)

# stem function

plt.stem(x, values)

plt.ylim(0, 1.2)

plt.show()

# stem function: If x is not provided, a sequence of numbers is created by python:

plt.stem(values)

plt.show()

二十九、自定义棒棒糖图

# libraries

import matplotlib.pyplot as plt

import numpy as np

# create data

values=np.random.uniform(size=40)

# plot without markers

plt.stem(values, markerfmt=' ')

plt.show()

# change color and shape and size and edges

(markers, stemlines, baseline) = plt.stem(values)

plt.setp(markers, marker='D', markersize=10, markeredgecolor="orange", markeredgewidth=2)

plt.show()

三十、垂直棒棒糖图

# libraries

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

# Create a dataframe

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })

# Reorder it based on the values

ordered_df = df.sort_values(by='values')

my_range=range(1,len(df.index)+1)

# The horizontal plot is made using the hline function

plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color='skyblue')

plt.plot(ordered_df['values'], my_range, "o")

# Add titles and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("A vertical lolipop plot", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# Show the plot

plt.show()

三十一、高光棒棒糖图

# libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataframe

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'values':np.random.uniform(size=20) })

# Reorder it based on values:

ordered_df = df.sort_values(by='values')

my_range=range(1,len(df.index)+1)

# Create a color if the group is "B"

my_color=np.where(ordered_df ['group']=='B', 'orange', 'skyblue')

my_size=np.where(ordered_df ['group']=='B', 70, 30)

# The horizontal plot is made using the hline() function

plt.hlines(y=my_range, xmin=0, xmax=ordered_df['values'], color=my_color, alpha=0.4)

plt.scatter(ordered_df['values'], my_range, color=my_color, s=my_size, alpha=1)

# Add title and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("What about the B group?", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# show the graph

plt.show()

三十二、分组棒棒糖图

# libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataframe

value1=np.random.uniform(size=20)

value2=value1+np.random.uniform(size=20)/4

df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })

# Reorder it following the values of the first value:

ordered_df = df.sort_values(by='value1')

my_range=range(1,len(df.index)+1)

# The horizontal plot is made using the hline function

plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)

plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')

plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')

plt.legend()

# Add title and axis names

plt.yticks(my_range, ordered_df['group'])

plt.title("Comparison of the value 1 and the value 2", loc='left')

plt.xlabel('Value of the variables')

plt.ylabel('Group')

# Show the graph

plt.show()

三十三、带条件色彩的棒棒糖图

#%%

# libraries

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

# Data

x = np.linspace(0, 2*np.pi, 100)

y = np.sin(x) + np.random.uniform(size=len(x)) - 0.2

# Create a color if the y axis value is equal or greater than 0

my_color = np.where(y>=0, 'orange', 'skyblue')

# The vertical plot is made using the vline function

plt.vlines(x=x, ymin=0, ymax=y, color=my_color, alpha=0.4)

plt.scatter(x, y, color=my_color, s=1, alpha=1)

# Add title and axis names

plt.title("Evolution of the value of ...", loc='left')

plt.xlabel('Value of the variable')

plt.ylabel('Group')

# Show the graph

plt.show()

三十四、面积图

import numpy as np

import matplotlib.pyplot as plt

# Create data

x=range(1,6)

y=[1,4,6,8,4]

# Area plot

plt.fill_between(x, y)

# Show the graph

plt.show()

# Note that we could also use the stackplot function

# but fill_between is more convenient for future customization.

#plt.stackplot(x,y)

#plt.show()

三十五、改善区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# create data

x=range(1,15)

y=[1,4,6,8,4,5,3,2,4,1,5,6,8,7]

# Change the color and its transparency

plt.fill_between( x, y, color="skyblue", alpha=0.4)

# Show the graph

plt.show()

# Same, but add a stronger line on top (edge)

plt.fill_between( x, y, color="skyblue", alpha=0.2)

plt.plot(x, y, color="Slateblue", alpha=0.6)

# See the line plot function to learn how to customize the plt.plot function

# Show the graph

plt.show()

三十六、区域图表和分面

# libraries

import numpy as np

import seaborn as sns

import pandas as pd

import matplotlib.pyplot as plt

# Create a dataset

my_count=["France","Australia","Japan","USA","Germany","Congo","China","England","Spain","Greece","Marocco","South Africa","Indonesia","Peru","Chili","Brazil"]

df = pd.DataFrame({

"country":np.repeat(my_count, 10),

"years":list(range(2000, 2010)) * 16,

"value":np.random.rand(160)

})

# Create a grid : initialize it

g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )

# Add the line over the area with the plot function

g = g.map(plt.plot, 'years', 'value')

# Fill the area with fill_between

g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")

# Control the title of each facet

g = g.set_titles("{col_name}")

# Add a title for the whole plot

plt.subplots_adjust(top=0.92)

g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')

# Show the graph

plt.show()

三十七、白色网格区域图

# libraries

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

# set the seaborn style

sns.set_style("whitegrid")

# Color palette

blue, = sns.color_palette("muted", 1)

# Create data

x = np.arange(23)

y = np.random.randint(8, 20, 23)

# Make the plot

fig, ax = plt.subplots()

ax.plot(x, y, color=blue, lw=3)

ax.fill_between(x, 0, y, alpha=.3)

ax.set(xlim=(0, len(x) - 1), ylim=(0, None), xticks=x)

# Show the graph

plt.show()

三十八、基本堆叠区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

# --- FORMAT 1

# Your x and y axis

x=range(1,6)

y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]

# Basic stacked area chart.

plt.stackplot(x,y, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

# --- FORMAT 2

x=range(1,6)

y1=[1,4,6,8,9]

y2=[2,2,7,10,12]

y3=[2,8,5,10,6]

# Basic stacked area chart.

plt.stackplot(x,y1, y2, y3, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

三十九、海洋风格的堆叠区域图

# libraries

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# set seaborn style

sns.set_theme()

# Data

x=range(1,6)

y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]

# Plot

plt.stackplot(x,y, labels=['A','B','C'])

plt.legend(loc='upper left')

plt.show()

四十、基线选项堆叠面积图

# libraries

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# Create data

X = np.arange(0, 10, 1)

Y = X + 5 * np.random.random((5, X.size))

# There are 4 types of baseline we can use:

baseline = ["zero", "sym", "wiggle", "weighted_wiggle"]

# Let's make 4 plots, 1 for each baseline

for n, v in enumerate(baseline):

if n<3 :

plt.tick_params(labelbottom='off')

plt.subplot(2 ,2, n + 1)

plt.stackplot(X, *Y, baseline=v)

plt.title(v)

plt.tight_layout()

01. 小提琴图

小提琴图可以将一组或多组数据的数值变量分布可视化。

相比有时会隐藏数据特征的箱形图相比，小提琴图值得更多关注。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.violinplot(x=df["species"], y=df["sepal_length"])

plt.show()

02. 核密度估计图

核密度估计图其实是对直方图的一个自然拓展。

可以可视化一个或多个组的数值变量的分布，非常适合大型数据集。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.kdeplot(df['sepal_width'])

plt.show()

03. 直方图

直方图，可视化一组或多组数据的分布情况。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.distplot(a=df["sepal_length"], hist=True, kde=False, rug=False)

plt.show()

04. 箱形图

箱形图，可视化一组或多组数据的分布情况。

可以快速获得中位数、四分位数和异常值，但也隐藏数据集的各个数据点。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.boxplot(x=df["species"], y=df["sepal_length"])

plt.show()

06. 散点图

散点图，显示2个数值变量之间的关系。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.regplot(x=df["sepal_length"], y=df["sepal_width"])

plt.show()

08. 相关性图

相关性图或相关矩阵图，分析每对数据变量之间的关系。

相关性可视化为散点图，对角线用直方图或密度图表示每个变量的分布。

import seaborn as sns

import matplotlib.pyplot as plt

# 加载数据

df = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 绘图显示

sns.pairplot(df)

plt.show()

10. 连接散点图

连接散点图就是一个线图，其中每个数据点由圆形或任何类型的标记展示。

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# 创建数据

df = pd.DataFrame({'x_axis': range(1, 10), 'y_axis': np.random.randn(9) * 80 + range(1, 10)})

# 绘制显示

plt.plot('x_axis', 'y_axis', data=df, linestyle='-', marker='o')

plt.show()

11. 二维密度图

二维密度图或二维直方图，可视化两个定量变量的组合分布。

它们总是在X轴上表示一个变量，另一个在Y轴上，就像散点图。

然后计算二维空间特定区域内的次数，并用颜色渐变表示。

形状变化：六边形a hexbin chart，正方形a 2d histogram，核密度2d density plots或contour plots。

import numpy as np

import matplotlib.pyplot as plt

from scipy.stats import kde

# 创建数据, 200个点

data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)

x, y = data.T

# 创建画布, 6个子图

fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))

# 第一个子图, 散点图

axes[0].set_title('Scatterplot')

axes[0].plot(x, y, 'ko')

# 第二个子图, 六边形

nbins = 20

axes[1].set_title('Hexbin')

axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)

# 2D 直方图

axes[2].set_title('2D Histogram')

axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)

# 高斯kde

k = kde.gaussian_kde(data.T)

xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j]

zi = k(np.vstack([xi.flatten(), yi.flatten()]))

# 密度图

axes[3].set_title('Calculate Gaussian KDE')

axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='auto', cmap=plt.cm.BuGn_r)

# 添加阴影

axes[4].set_title('2D Density with shading')

axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

# 添加轮廓

axes[5].set_title('Contour')

axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

axes[5].contour(xi, yi, zi.reshape(xi.shape))

plt.show()

12. 条形图

条形图表示多个明确的变量的数值关系。每个变量都为一个条形。条形的大小代表其数值。

import numpy as np

import matplotlib.pyplot as plt

# 生成随机数据

height = [3, 12, 5, 18, 45]

bars = ('A', 'B', 'C', 'D', 'E')

y_pos = np.arange(len(bars))

# 创建条形图

plt.bar(y_pos, height)

# x轴标签

plt.xticks(y_pos, bars)

# 显示

plt.show()

13. 雷达图

雷达图，可以可视化多个定量变量的一个或多个系列的值。

每个变量都有自己的轴，所有轴都连接在图形的中心。

import matplotlib.pyplot as plt

import pandas as pd

from math import pi

# 设置数据

df = pd.DataFrame({

'group': ['A', 'B', 'C', 'D'],

'var1': [38, 1.5, 30, 4],

'var2': [29, 10, 9, 34],

'var3': [8, 39, 23, 24],

'var4': [7, 31, 33, 14],

'var5': [28, 15, 32, 14]

})

# 目标数量

categories = list(df)[1:]

N = len(categories)

# 角度

angles = [n / float(N) * 2 * pi for n in range(N)]

angles += angles[:1]

# 初始化

ax = plt.subplot(111, polar=True)

# 设置第一处

ax.set_theta_offset(pi / 2)

ax.set_theta_direction(-1)

# 添加背景信息

plt.xticks(angles[:-1], categories)

ax.set_rlabel_position(0)

plt.yticks([10, 20, 30], ["10", "20", "30"], color="grey", size=7)

plt.ylim(0, 40)

# 添加数据图

# 第一个

values = df.loc[0].drop('group').values.flatten().tolist()

values += values[:1]

ax.plot(angles, values, linewidth=1, linestyle='solid', label="group A")

ax.fill(angles, values, 'b', alpha=0.1)

# 第二个

values = df.loc[1].drop('group').values.flatten().tolist()

values += values[:1]

ax.plot(angles, values, linewidth=1, linestyle='solid', label="group B")

ax.fill(angles, values, 'r', alpha=0.1)

# 添加图例

plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

# 显示

plt.show()

14. 词云图

词云图是文本数据的视觉表示。

单词通常是单个的，每个单词的重要性以字体大小或颜色表示。

from wordcloud import WordCloud

import matplotlib.pyplot as plt

# 添加词语

text=("Python Python Python Matplotlib Chart Wordcloud Boxplot")

# 创建词云对象

wordcloud = WordCloud(width=480, height=480, margin=0).generate(text)

# 显示词云图

plt.imshow(wordcloud, interpolation='bilinear')

plt.axis("off")

plt.margins(x=0, y=0)

plt.show()

15. 平行座标图

一个平行座标图，能够比较不同系列相同属性的数值情况。

Pandas可能是绘制平行坐标图的最佳方式。

import seaborn as sns

import matplotlib.pyplot as plt

from pandas.plotting import parallel_coordinates

# 读取数据

data = sns.load_dataset('iris', data_home='seaborn-data', cache=True)

# 创建图表

parallel_coordinates(data, 'species', colormap=plt.get_cmap("Set2"))

# 显示

plt.show()

17. 径向柱图

径向柱图同样也是条形图的变形，但是使用极坐标而不是直角坐标系。

绘制起来有点麻烦，而且比柱状图准确度低，但更引人注目。

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

# 生成数据

df = pd.DataFrame(

{

'Name': ['item ' + str(i) for i in list(range(1, 51)) ],

'Value': np.random.randint(low=10, high=100, size=50)

})

# 排序

df = df.sort_values(by=['Value'])

# 初始化画布

plt.figure(figsize=(20, 10))

ax = plt.subplot(111, polar=True)

plt.axis('off')

# 设置图表参数

upperLimit = 100

lowerLimit = 30

labelPadding = 4

# 计算最大值

max = df['Value'].max()

# 数据下限10, 上限100

slope = (max - lowerLimit) / max

heights = slope * df.Value + lowerLimit

# 计算条形图的宽度

width = 2*np.pi / len(df.index)

# 计算角度

indexes = list(range(1, len(df.index)+1))

angles = [element * width for element in indexes]

# 绘制条形图

bars = ax.bar(

x=angles,

height=heights,

width=width,

bottom=lowerLimit,

linewidth=2,

edgecolor="white",

color="#61a4b2",

)

# 添加标签

for bar, angle, height, label in zip(bars,angles, heights, df["Name"]):

# 旋转

rotation = np.rad2deg(angle)

# 翻转

alignment = ""

if angle >= np.pi/2 and angle < 3*np.pi/2:

alignment = "right"

rotation = rotation + 180

else:

alignment = "left"

# 最后添加标签

ax.text(

x=angle,

y=lowerLimit + bar.get_height() + labelPadding,

s=label,

ha=alignment,

va='center',

rotation=rotation,

rotation_mode="anchor")

plt.show()

19. 维恩图

维恩图，显示不同组之间所有可能的关系。

import matplotlib.pyplot as plt

from matplotlib_venn import venn2

# 创建图表

venn2(subsets=(10, 5, 2), set_labels=('Group A', 'Group B'))

# 显示

plt.show()

20. 圆环图

圆环图，本质上就是一个饼图，中间切掉了一个区域。

import matplotlib.pyplot as plt

# 创建数据

size_of_groups = [12, 11, 3, 30]

# 生成饼图

plt.pie(size_of_groups)

# 在中心添加一个圆, 生成环形图

my_circle = plt.Circle((0, 0), 0.7, color='white')

p = plt.gcf()

p.gca().add_artist(my_circle)

plt.show()

21. 饼图

饼图，最常见的可视化图表之一。

将圆划分成一个个扇形区域，每个区域代表在整体中所占的比例。

import matplotlib.pyplot as plt

# 创建数据

size_of_groups = [12, 11, 3, 30]

# 生成饼图

plt.pie(size_of_groups)

plt.show()

22. 树图

树图主要用来可视化树形数据结构，是一种特殊的层次类型，具有唯一的根节点，左子树，和右子树。

import pandas as pd

from matplotlib import pyplot as plt

from scipy.cluster.hierarchy import dendrogram, linkage

# 读取数据

df = pd.read_csv('mtcars.csv')

df = df.set_index('model')

# 计算每个样本之间的距离

Z = linkage(df, 'ward')

# 绘图

dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)

# 显示

plt.show()

25. 面积图

面积图和折线图非常相似，区别在于和x坐标轴间是否被颜色填充。

import matplotlib.pyplot as plt

# 创建数据

x = range(1, 6)

y = [1, 4, 6, 8, 4]

# 生成图表

plt.fill_between(x, y)

plt.show()

使用Matplotlib的fill_between()进行绘制，结果如下。

b5d3cdba052058d381dcdcd7c28cb680.png

26. 堆叠面积图

堆叠面积图表示若干个数值变量的数值演变。

每个显示在彼此的顶部，易于读取总数，但较难准确读取每个的值。

import matplotlib.pyplot as plt

# 创建数据

x = range(1, 6)

y1 = [1, 4, 6, 8, 9]

y2 = [2, 2, 7, 10, 12]

y3 = [2, 8, 5, 10, 6]

# 生成图表

plt.stackplot(x, y1, y2, y3, labels=['A', 'B', 'C'])

plt.legend(loc='upper left')

plt.show()

27. 河流图

河流图是一种特殊的流图, 它主要用来表示事件或主题等在一段时间内的变化。

围绕着中心轴显示，且边缘是圆形的，从而形成流动的形状。

import matplotlib.pyplot as plt

import numpy as np

from scipy import stats

# 添加数据

x = np.arange(1990, 2020)

y = [np.random.randint(0, 5, size=30) for _ in range(5)]

def gaussian_smooth(x, y, grid, sd):

"""平滑曲线"""

weights = np.transpose([stats.norm.pdf(grid, m, sd) for m in x])

weights = weights / weights.sum(0)

return (weights * y).sum(1)

# 自定义颜色

COLORS = ["#D0D1E6", "#A6BDDB", "#74A9CF", "#2B8CBE", "#045A8D"]

# 创建画布

fig, ax = plt.subplots(figsize=(10, 7))

# 生成图表

grid = np.linspace(1985, 2025, num=500)

y_smoothed = [gaussian_smooth(x, y_, grid, 1) for y_ in y]

ax.stackplot(grid, y_smoothed, colors=COLORS, baseline="sym")

# 显示

plt.show()

28. 时间序列图

时间序列图是指能够展示数值演变的所有图表。

比如折线图、柱状图、面积图等等。

import numpy as np

import seaborn as sns

import pandas as pd

import matplotlib.pyplot as plt

# 创建数据

my_count = ["France", "Australia", "Japan", "USA", "Germany", "Congo", "China", "England", "Spain", "Greece", "Marocco",

"South Africa", "Indonesia", "Peru", "Chili", "Brazil"]

df = pd.DataFrame({

"country": np.repeat(my_count, 10),

"years": list(range(2000, 2010)) * 16,

"value": np.random.rand(160)

})

# 创建网格

g = sns.FacetGrid(df, col='country', hue='country', col_wrap=4, )

# 添加曲线图

g = g.map(plt.plot, 'years', 'value')

# 面积图

g = g.map(plt.fill_between, 'years', 'value', alpha=0.2).set_titles("{col_name} country")

# 标题

g = g.set_titles("{col_name}")

# 总标题

plt.subplots_adjust(top=0.92)

g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')

# 显示

plt.show()

g = g.set_titles("{col_name}")

# 总标题

plt.subplots_adjust(top=0.92)

g = g.fig.suptitle('Evolution of the value of stuff in 16 countries')

# 显示

plt.show()

dd的博客

关注

5
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
分布式比赛数据可视化笔记

data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))data['等级'] = pd.cut(data['总价'], [0, 50, 65, 80, 95, 110, 125, 140, 155, 170, float('inf')], labels=list(range(1, 11)))
复制链接

扫一扫