Marsilea可视化单细胞RNA-seq和空间转录组数据

最新推荐文章于 2025-06-13 10:25:00 发布

生信与基因组学

最新推荐文章于 2025-06-13 10:25:00 发布

阅读量510

点赞数 11

分类专栏：单细胞RNA-seq分析文章标签： python 数据挖掘数据分析

本文链接：https://blog.csdn.net/LittleComputerRobot/article/details/141052838

版权

单细胞RNA-seq分析专栏收录该内容

11 篇文章

订阅专栏

1. Marsilea简介

Marsilea是一个Python库，用于以声明的方式创建可组合的可视化。它构建在Matplotlib之上，并提供了一个高级API，可以将不同的可视化效果组合在一起。

2. 安装基础Python库

pip install matplotlib 
pip install scikit-learn
pip install marsilea 
pip install oncoprinter

3. Marsilea可视化单细胞RNA-seq数据

Marsilea可视化教程： https://marsilea.readthedocs.io/en/stable/examples/index.html

可视化教程

3.1 组合热图

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import marsilea as ma
import marsilea.plotter as mp
from sklearn.preprocessing import normalize

# 加载pbmc3k数据
pbmc3k = ma.load_data("pbmc3k")
exp = pbmc3k["exp"]
pct_cells = pbmc3k["pct_cells"]
count = pbmc3k["count"]

matrix = normalize(exp.to_numpy(), axis=0)

cell_cat = [
    "Lymphoid",
    "Myeloid",
    "Lymphoid",
    "Lymphoid",
    "Lymphoid",
    "Myeloid",
    "Myeloid",
    "Myeloid",
]
cell_names = [
    "CD4 T",
    "CD14\nMonocytes",
    "B",
    "CD8 T",
    "NK",
    "FCGR3A\nMonocytes",
    "Dendritic",
    "Megakaryocytes",
]

# Make plots
cells_proportion = mp.SizedMesh(
    pct_cells,
    size_norm=Normalize(vmin=0, vmax=100),
    color="none",
    edgecolor="#6E75A4",
    linewidth=2,
    sizes=(1, 600),
    size_legend_kws=dict(title="% of cells", show_at=[0.3, 0.5, 0.8, 1]),
)
mark_high = mp.MarkerMesh(matrix > 0.7, color="#DB4D6D", label="High")
cell_count = mp.Numbers(count["Value"], color="#fac858", label="Cell Count")
cell_exp = mp.Violin(
    exp, label="Expression", linewidth=0, color="#ee6666", density_norm="count"
)
cell_types = mp.Labels(cell_names, align="center")
gene_names = mp.Labels(exp.columns)

# Group plots together
h = ma.Heatmap(
    matrix, cmap="Greens", label="Normalized\nExpression", width=4.5, height=5.5
)
h.add_layer(cells_proportion)
h.add_layer(mark_high)
h.add_right(cell_count, pad=0.1, size=0.7)
h.add_top(cell_exp, pad=0.1, size=0.75, name="exp")
h.add_left(cell_types)
h.add_bottom(gene_names)

h.hsplit(labels=cell_cat, order=["Lymphoid", "Myeloid"])
h.add_left(mp.Chunk(["Lymphoid", "Myeloid"], ["#33A6B8", "#B481BB"]), pad=0.05)
h.add_dendrogram("left", colors=["#33A6B8", "#B481BB"])
h.add_dendrogram("bottom")
h.add_legends("right", align_stacks="center", align_legends="top", pad=0.2)
h.set_margin(0.2)
h.render()

# h.get_ax("exp").set_yscale("symlog")

在这里插入图片描述

3.2 乳腺癌基因突变与OncoPrint

import matplotlib.pyplot as plt
import marsilea as ma
import marsilea.plotter as mp
from oncoprinter import OncoPrint

# 加载数据
onco_data = ma.load_data("oncoprint")
cna = onco_data["cna"]
mrna_exp = onco_data["mrna_exp"]
methyl_exp = onco_data["methyl_exp"]
clinical = onco_data["clinical"]

# mRNA表达可视化
h = ma.Heatmap(
    mrna_exp,
    cmap="gist_heat_r",
    height=0.9,
    width=5,
    cbar_kws=dict(orientation="horizontal", title="mRNA Expression"),
)
h.add_title(top="mRNA expression Z-SCORE", align="left", fontsize=10)
h.add_left(mp.Labels(mrna_exp.index), pad=0.1)
h.render()

mRNA

# 甲基化表达可视化
m = ma.Heatmap(
    methyl_exp.astype(float),
    height=0.6,
    width=5,
    cmap="summer_r",
    cbar_kws=dict(orientation="horizontal", title="Methylation"),
)
m.add_title(top="Methylation", align="left", fontsize=10)
m.add_left(mp.Labels(methyl_exp.index), pad=0.1)
m.render()

甲基化

# 创建Oncoprint
op = OncoPrint(cna, name="main")
op.render()

Oncoprint

# 临床信息
clinical = clinical[op.samples_order]
tumor_type = clinical.loc["Cancer Type Detailed"]
tumor_colors = mp.Colors(tumor_type, label="Tumor Type", label_loc="left")

mut_count = clinical.loc["Mutation Count"]
mut_number = mp.Numbers(mut_count, show_value=False, color="orange")

op.add_bottom(tumor_colors, size=0.2, pad=0.1)
op.add_bottom(mut_number, size=0.2, name="mutation_count", pad=0.1, legend=False)
op.render()

临床信息

op /= h
op /= m

# 可视化渲染
op.set_margin(0.2)
op.add_legends(box_padding=2, stack_size=4)
op.render()
mut_ax = op.get_ax("main", "mutation_count")
mut_ax.set_axis_off()
mut_ax.text(
    0,
    0.5,
    "Mutation Count",
    rotation=0,
    ha="right",
    va="center",
    transform=mut_ax.transAxes,
)

可视化渲染

3.3 分段条图

可视化食用油中的脂肪含量。

import marsilea as ma
import marsilea.plotter as mp
import mpl_fontkit as fk

fk.install_fontawesome(verbose=False)
fk.install("Lato", verbose=False)

# 加载数据
oils = ma.load_data("cooking_oils")

red = "#cd442a"
yellow = "#f0bd00"
green = "#7e9437"
gray = "#eee"

mapper = {0: "\uf58a", 1: "\uf11a", 2: "\uf567"}
cmapper = {0: "#609966", 1: "#DC8449", 2: "#F16767"}
flavour = [mapper[i] for i in oils["flavour"].values]
flavour_colors = [cmapper[i] for i in oils["flavour"].values]
fat_content = oils[
    ["saturated", "polyunsaturated (omega 3 & 6)", "monounsaturated", "other fat"]
]

# 可视化数据
fat_stack_bar = mp.StackBar(
    fat_content.T * 100,
    colors=[red, yellow, green, gray],
    width=0.8,
    orient="h",
    label="Fat Content (%)",
    legend_kws={"ncol": 2, "fontsize": 10},
)
fmt = lambda x: f"{x:.1f}" if x > 0 else ""
trans_fat_bar = mp.Numbers(
    oils["trans fat"] * 100,
    fmt=fmt,
    color="#3A98B9",
    label="Trans Fat (%)",
)

flavour_emoji = mp.Labels(
    flavour, fontfamily="Font Awesome 6 Free", text_props={"color": flavour_colors}
)

oil_names = mp.Labels(oils.index.str.capitalize())

fmt = lambda x: f"{int(x)}" if x > 0 else ""

omege_bar = ma.plotter.CenterBar(
    (oils[["omega 3", "omega 6"]] * 100).astype(int),
    names=["Omega 3 (%)", "Omega 6 (%)"],
    colors=["#7DB9B6", "#F5E9CF"],
    fmt=fmt,
    show_value=True,
)
conditions_text = [
    "Control",
    ">230 °C\nDeep-frying",
    "200-229 °C\nStir-frying",
    "150-199 °C\nLight saute",
    "<150 °C\nDressings",
]
colors = ["#e5e7eb", "#c2410c", "#fb923c", "#fca5a5", "#fecaca"]
conditions = ma.plotter.Chunk(conditions_text, colors, rotation=0, padding=10)

cb = ma.ClusterBoard(fat_content.to_numpy(), height=10)
cb.add_layer(fat_stack_bar)
cb.add_left(trans_fat_bar, pad=0.2, name="trans fat")
cb.add_right(flavour_emoji)
cb.add_right(oil_names, pad=0.1)
cb.add_right(omege_bar, size=2, pad=0.2)

order = [
    "Control",
    ">230 °C (Deep-frying)",
    "200-229 °C (Stir-frying)",
    "150-199 °C (Light saute)",
    "<150 °C (Dressings)",
]
cb.hsplit(labels=oils["cooking conditions"], order=order)
cb.add_left(conditions, pad=0.1)
cb.add_dendrogram(
    "left", add_meta=False, colors=colors, linewidth=1.5, size=0.5, pad=0.02
)
cb.add_title(top="Fat in Cooking Oils", fontsize=16)
cb.add_legends("bottom", pad=0.3)
cb.render()

axes = cb.get_ax("trans fat")
for ax in axes:
    ax.set_xlim(4.2, 0)

在这里插入图片描述

3.4 序列比对图

from collections import Counter
import numpy as np
import pandas as pd
import marsilea as ma
import matplotlib as mpl

# 加载数据
seq = ma.load_data("seq_align")
seq = seq.iloc[:, 130:175]

# 计算氨基酸高度
collect = []
for _, col in seq.items():
    collect.append(Counter(col))

hm = pd.DataFrame(collect)
del hm["-"]
hm = hm.T.fillna(0.0)
hm.columns = seq.columns
hm /= hm.sum(axis=0)

n = hm.shape[1]
s = 20
En = (1 / np.log(2)) * ((s - 1) / (2 * n))

heights = []
for _, col in hm.items():
    H = -(np.log2(col) * col).sum()
    R = np.log2(20) - (H + En)
    heights.append(col * R)

logo = pd.DataFrame(heights).T

# 颜色
color_encode = {
    "A": "#f76ab4",
    "C": "#ff7f00",
    "D": "#e41a1c",
    "E": "#e41a1c",
    "F": "#84380b",
    "G": "#f76ab4",
    "H": "#3c58e5",
    "I": "#12ab0d",
    "K": "#3c58e5",
    "L": "#12ab0d",
    "M": "#12ab0d",
    "N": "#972aa8",
    "P": "#12ab0d",
    "Q": "#972aa8",
    "R": "#3c58e5",
    "S": "#ff7f00",
    "T": "#ff7f00",
    "V": "#12ab0d",
    "W": "#84380b",
    "Y": "#84380b",
    "-": "white",
}

max_aa = []
freq = []

for _, col in hm.items():
    ix = np.argmax(col)
    max_aa.append(hm.index[ix])
    freq.append(col[ix])

position = []
mock_ticks = []
for i in seq.columns:
    if int(i) % 10 == 0:
        position.append(i)
        mock_ticks.append("^")
    else:
        position.append("")
        mock_ticks.append("")

# 绘图
height = 5
width = height * seq.shape[1] / seq.shape[0]

ch = ma.CatHeatmap(seq.to_numpy(), palette=color_encode, height=height, width=width)
ch.add_layer(ma.plotter.TextMesh(seq.to_numpy()))
ch.add_top(ma.plotter.SeqLogo(logo, color_encode=color_encode), pad=0.1, size=2)
ch.add_left(ma.plotter.Labels(seq.index), pad=0.1)
ch.add_bottom(ma.plotter.Labels(mock_ticks, rotation=0), pad=0.1)
ch.add_bottom(ma.plotter.Labels(position, rotation=0))
ch.add_bottom(
    ma.plotter.Numbers(freq, width=0.9, color="#FFB11B", show_value=False),
    name="freq_bar",
    size=2,
)
ch.add_bottom(ma.plotter.Labels(max_aa, rotation=0), pad=0.1)
ch.render()

ch.get_ax("freq_bar").set_axis_off()