1. Marsilea简介
Marsilea是一个Python库,用于以声明的方式创建可组合的可视化。它构建在Matplotlib之上,并提供了一个高级API,可以将不同的可视化效果组合在一起。
2. 安装基础Python库
pip install matplotlib
pip install scikit-learn
pip install marsilea
pip install oncoprinter
3. Marsilea可视化单细胞RNA-seq数据
Marsilea可视化教程: https://marsilea.readthedocs.io/en/stable/examples/index.html
3.1 组合热图
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import marsilea as ma
import marsilea.plotter as mp
from sklearn.preprocessing import normalize
# 加载pbmc3k数据
pbmc3k = ma.load_data("pbmc3k")
exp = pbmc3k["exp"]
pct_cells = pbmc3k["pct_cells"]
count = pbmc3k["count"]
matrix = normalize(exp.to_numpy(), axis=0)
cell_cat = [
"Lymphoid",
"Myeloid",
"Lymphoid",
"Lymphoid",
"Lymphoid",
"Myeloid",
"Myeloid",
"Myeloid",
]
cell_names = [
"CD4 T",
"CD14\nMonocytes",
"B",
"CD8 T",
"NK",
"FCGR3A\nMonocytes",
"Dendritic",
"Megakaryocytes",
]
# Make plots
cells_proportion = mp.SizedMesh(
pct_cells,
size_norm=Normalize(vmin=0, vmax=100),
color="none",
edgecolor="#6E75A4",
linewidth=2,
sizes=(1, 600),
size_legend_kws=dict(title="% of cells", show_at=[0.3, 0.5, 0.8, 1]),
)
mark_high = mp.MarkerMesh(matrix > 0.7, color="#DB4D6D", label="High")
cell_count = mp.Numbers(count["Value"], color="#fac858", label="Cell Count")
cell_exp = mp.Violin(
exp, label="Expression", linewidth=0, color="#ee6666", density_norm="count"
)
cell_types = mp.Labels(cell_names, align="center")
gene_names = mp.Labels(exp.columns)
# Group plots together
h = ma.Heatmap(
matrix, cmap="Greens", label="Normalized\nExpression", width=4.5, height=5.5
)
h.add_layer(cells_proportion)
h.add_layer(mark_high)
h.add_right(cell_count, pad=0.1, size=0.7)
h.add_top(cell_exp, pad=0.1, size=0.75, name="exp")
h.add_left(cell_types)
h.add_bottom(gene_names)
h.hsplit(labels=cell_cat, order=["Lymphoid", "Myeloid"])
h.add_left(mp.Chunk(["Lymphoid", "Myeloid"], ["#33A6B8", "#B481BB"]), pad=0.05)
h.add_dendrogram("left", colors=["#33A6B8", "#B481BB"])
h.add_dendrogram("bottom")
h.add_legends("right", align_stacks="center", align_legends="top", pad=0.2)
h.set_margin(0.2)
h.render()
# h.get_ax("exp").set_yscale("symlog")
3.2 乳腺癌基因突变与OncoPrint
import matplotlib.pyplot as plt
import marsilea as ma
import marsilea.plotter as mp
from oncoprinter import OncoPrint
# 加载数据
onco_data = ma.load_data("oncoprint")
cna = onco_data["cna"]
mrna_exp = onco_data["mrna_exp"]
methyl_exp = onco_data["methyl_exp"]
clinical = onco_data["clinical"]
# mRNA表达可视化
h = ma.Heatmap(
mrna_exp,
cmap="gist_heat_r",
height=0.9,
width=5,
cbar_kws=dict(orientation="horizontal", title="mRNA Expression"),
)
h.add_title(top="mRNA expression Z-SCORE", align="left", fontsize=10)
h.add_left(mp.Labels(mrna_exp.index), pad=0.1)
h.render()
# 甲基化表达可视化
m = ma.Heatmap(
methyl_exp.astype(float),
height=0.6,
width=5,
cmap="summer_r",
cbar_kws=dict(orientation="horizontal", title="Methylation"),
)
m.add_title(top="Methylation", align="left", fontsize=10)
m.add_left(mp.Labels(methyl_exp.index), pad=0.1)
m.render()
# 创建Oncoprint
op = OncoPrint(cna, name="main")
op.render()
# 临床信息
clinical = clinical[op.samples_order]
tumor_type = clinical.loc["Cancer Type Detailed"]
tumor_colors = mp.Colors(tumor_type, label="Tumor Type", label_loc="left")
mut_count = clinical.loc["Mutation Count"]
mut_number = mp.Numbers(mut_count, show_value=False, color="orange")
op.add_bottom(tumor_colors, size=0.2, pad=0.1)
op.add_bottom(mut_number, size=0.2, name="mutation_count", pad=0.1, legend=False)
op.render()
op /= h
op /= m
# 可视化渲染
op.set_margin(0.2)
op.add_legends(box_padding=2, stack_size=4)
op.render()
mut_ax = op.get_ax("main", "mutation_count")
mut_ax.set_axis_off()
mut_ax.text(
0,
0.5,
"Mutation Count",
rotation=0,
ha="right",
va="center",
transform=mut_ax.transAxes,
)
3.3 分段条图
可视化食用油中的脂肪含量。
import marsilea as ma
import marsilea.plotter as mp
import mpl_fontkit as fk
fk.install_fontawesome(verbose=False)
fk.install("Lato", verbose=False)
# 加载数据
oils = ma.load_data("cooking_oils")
red = "#cd442a"
yellow = "#f0bd00"
green = "#7e9437"
gray = "#eee"
mapper = {0: "\uf58a", 1: "\uf11a", 2: "\uf567"}
cmapper = {0: "#609966", 1: "#DC8449", 2: "#F16767"}
flavour = [mapper[i] for i in oils["flavour"].values]
flavour_colors = [cmapper[i] for i in oils["flavour"].values]
fat_content = oils[
["saturated", "polyunsaturated (omega 3 & 6)", "monounsaturated", "other fat"]
]
# 可视化数据
fat_stack_bar = mp.StackBar(
fat_content.T * 100,
colors=[red, yellow, green, gray],
width=0.8,
orient="h",
label="Fat Content (%)",
legend_kws={"ncol": 2, "fontsize": 10},
)
fmt = lambda x: f"{x:.1f}" if x > 0 else ""
trans_fat_bar = mp.Numbers(
oils["trans fat"] * 100,
fmt=fmt,
color="#3A98B9",
label="Trans Fat (%)",
)
flavour_emoji = mp.Labels(
flavour, fontfamily="Font Awesome 6 Free", text_props={"color": flavour_colors}
)
oil_names = mp.Labels(oils.index.str.capitalize())
fmt = lambda x: f"{int(x)}" if x > 0 else ""
omege_bar = ma.plotter.CenterBar(
(oils[["omega 3", "omega 6"]] * 100).astype(int),
names=["Omega 3 (%)", "Omega 6 (%)"],
colors=["#7DB9B6", "#F5E9CF"],
fmt=fmt,
show_value=True,
)
conditions_text = [
"Control",
">230 °C\nDeep-frying",
"200-229 °C\nStir-frying",
"150-199 °C\nLight saute",
"<150 °C\nDressings",
]
colors = ["#e5e7eb", "#c2410c", "#fb923c", "#fca5a5", "#fecaca"]
conditions = ma.plotter.Chunk(conditions_text, colors, rotation=0, padding=10)
cb = ma.ClusterBoard(fat_content.to_numpy(), height=10)
cb.add_layer(fat_stack_bar)
cb.add_left(trans_fat_bar, pad=0.2, name="trans fat")
cb.add_right(flavour_emoji)
cb.add_right(oil_names, pad=0.1)
cb.add_right(omege_bar, size=2, pad=0.2)
order = [
"Control",
">230 °C (Deep-frying)",
"200-229 °C (Stir-frying)",
"150-199 °C (Light saute)",
"<150 °C (Dressings)",
]
cb.hsplit(labels=oils["cooking conditions"], order=order)
cb.add_left(conditions, pad=0.1)
cb.add_dendrogram(
"left", add_meta=False, colors=colors, linewidth=1.5, size=0.5, pad=0.02
)
cb.add_title(top="Fat in Cooking Oils", fontsize=16)
cb.add_legends("bottom", pad=0.3)
cb.render()
axes = cb.get_ax("trans fat")
for ax in axes:
ax.set_xlim(4.2, 0)
3.4 序列比对图
from collections import Counter
import numpy as np
import pandas as pd
import marsilea as ma
import matplotlib as mpl
# 加载数据
seq = ma.load_data("seq_align")
seq = seq.iloc[:, 130:175]
# 计算氨基酸高度
collect = []
for _, col in seq.items():
collect.append(Counter(col))
hm = pd.DataFrame(collect)
del hm["-"]
hm = hm.T.fillna(0.0)
hm.columns = seq.columns
hm /= hm.sum(axis=0)
n = hm.shape[1]
s = 20
En = (1 / np.log(2)) * ((s - 1) / (2 * n))
heights = []
for _, col in hm.items():
H = -(np.log2(col) * col).sum()
R = np.log2(20) - (H + En)
heights.append(col * R)
logo = pd.DataFrame(heights).T
# 颜色
color_encode = {
"A": "#f76ab4",
"C": "#ff7f00",
"D": "#e41a1c",
"E": "#e41a1c",
"F": "#84380b",
"G": "#f76ab4",
"H": "#3c58e5",
"I": "#12ab0d",
"K": "#3c58e5",
"L": "#12ab0d",
"M": "#12ab0d",
"N": "#972aa8",
"P": "#12ab0d",
"Q": "#972aa8",
"R": "#3c58e5",
"S": "#ff7f00",
"T": "#ff7f00",
"V": "#12ab0d",
"W": "#84380b",
"Y": "#84380b",
"-": "white",
}
max_aa = []
freq = []
for _, col in hm.items():
ix = np.argmax(col)
max_aa.append(hm.index[ix])
freq.append(col[ix])
position = []
mock_ticks = []
for i in seq.columns:
if int(i) % 10 == 0:
position.append(i)
mock_ticks.append("^")
else:
position.append("")
mock_ticks.append("")
# 绘图
height = 5
width = height * seq.shape[1] / seq.shape[0]
ch = ma.CatHeatmap(seq.to_numpy(), palette=color_encode, height=height, width=width)
ch.add_layer(ma.plotter.TextMesh(seq.to_numpy()))
ch.add_top(ma.plotter.SeqLogo(logo, color_encode=color_encode), pad=0.1, size=2)
ch.add_left(ma.plotter.Labels(seq.index), pad=0.1)
ch.add_bottom(ma.plotter.Labels(mock_ticks, rotation=0), pad=0.1)
ch.add_bottom(ma.plotter.Labels(position, rotation=0))
ch.add_bottom(
ma.plotter.Numbers(freq, width=0.9, color="#FFB11B", show_value=False),
name="freq_bar",
size=2,
)
ch.add_bottom(ma.plotter.Labels(max_aa, rotation=0), pad=0.1)
ch.render()
ch.get_ax("freq_bar").set_axis_off()