# 导入库import pandas as pd
from matplotlib import pyplot as plt
from scipy.cluster import hierarchy
import numpy as np
# Import the mtcars dataset from the web + keep only numeric variables
url ='https://python-graph-gallery.com/wp-content/uploads/mtcars.csv'
df = pd.read_csv(url)
df
# Make the dendrogram
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance (Ward)')# 画聚类图,常用参数labels设定横坐标下标,leaf_rotation标题旋转# 详细使用见:https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.cluster.hierarchy.dendrogram.html
hierarchy.dendrogram(Z, labels=df.index, leaf_rotation=90);
3. 自定义树形图 customised dendrogram
叶标签 leaf label
聚类簇数 number of clusters
颜色 color
截减 truncate
方向 orientation
# 叶标签 leaf label# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# Plot with Custom leaves# 常用参数labels设定横坐标下标,leaf_rotation标题旋转,leaf_font_size设置字号
hierarchy.dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index);
# 聚类簇数 number of clusters# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# Control number of clusters in the plot + add horizontal line.# color_threshold设定颜色阈值,小于olor_threshold根据簇节点为一簇
hierarchy.dendrogram(Z, color_threshold=240)# 画水平线,y纵坐标,c颜色,lw线条粗细,linestyle线形
plt.axhline(y=240, c='grey', lw=1, linestyle='dashed');
# 颜色 color# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# Set the colour of the cluster here: 设置聚类颜色
hierarchy.set_link_color_palette(['#b30000','#996600','#b30086'])# Make the dendrogram and give the colour above threshold# above_threshold_color设置color_threshold上方链接的颜色
hierarchy.dendrogram(Z, color_threshold=240, above_threshold_color='grey')# Add horizontal line.
plt.axhline(y=240, c='grey', lw=1, linestyle='dashed');
# 截减 truncate# 原始观察矩阵很大时,树形图很难读取。截断用于压缩树形图。有几种模式:# 1 None 不执行截断# 2 lastp lastp设置叶子节点数,最底层节点数# 3 level 根据level设置图中层最大数# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# method 1: lastp# you will have 4 leaf at the bottom of the plot
hierarchy.dendrogram(Z, truncate_mode ='lastp', p=4);
# method 2: level# No more than ``p`` levels of the dendrogram tree are displayed.
hierarchy.dendrogram(Z, truncate_mode ='level', p=2);
# 方向 orientation# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# Orientation of the dendrogram# 设置层次树的朝向,orientation可选"top", "left", "bottom", "right",默认top
hierarchy.dendrogram(Z, orientation="right", labels=df.index);
# Orientation of the dendrogram
hierarchy.dendrogram(Z, orientation="bottom", labels=df.index);
4. 彩色树形图标签 color dendrogram labels
# Calculate the distance between each sample
Z = hierarchy.linkage(df,'ward')# Make the dendro# 画树状图
hierarchy.dendrogram(Z, labels=df.index, leaf_rotation=0, orientation="left", color_threshold=240, above_threshold_color='grey')# Create a color palette with 3 color for the 3 cyl possibilities# 设置渐变颜色,共三种颜色
my_palette = plt.cm.get_cmap("Accent",3)# transforme the 'cyl' column in a categorical variable. It will allow to put one color on each level.# 根据cyl设置颜色参数,对参数进行分类
df['cyl']=pd.Categorical(df['cyl'])# 获得每种汽车cyl对应的颜色
my_color=df['cyl'].cat.codes
# Apply the right color to each label
ax = plt.gca()# 获得y轴坐标标签
xlbls = ax.get_ymajorticklabels()
num=-1for lbl in xlbls:
num+=1
val=my_color[num]# 设置颜色
lbl.set_color(my_palette(val))