生物分子相互作用的随机模拟-CSDN博客

本文链接：https://blog.csdn.net/qq_42788199/article/details/147983053
生物分子相互作用的随机模拟：模拟生物分子（如蛋白质和配体）之间的随机结合和解离过程。使用 Python 的随机数生成功能，根据分子的结合常数和解离常数，模拟分子在溶液中的相互作用。统计不同时间点结合的分子数量和游离的分子数量，分析分子浓度、温度等因素对相互作用的影响。通过模拟多次实验，评估相互作用的可靠性和变异性。
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
import pandas as pd
import os
from matplotlib.colors import LinearSegmentedColormap

# 添加字体设置以确保中文正常显示
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC", "sans-serif"]
plt.rcParams["axes.unicode_minus"] = False  # 确保负号正确显示


class BiomolecularSimulator:
    """生物分子相互作用随机模拟类"""

    def __init__(self, protein_conc: float, ligand_conc: float,
                 k_on: float, k_off: float, volume: float = 1.0,
                 temperature: float = 298.15, seed: int = None):
        """
        初始化生物分子模拟参数

        参数:
            protein_conc: 蛋白质初始浓度 (μM)
            ligand_conc: 配体初始浓度 (μM)
            k_on: 结合速率常数 (μM^-1·s^-1)
            k_off: 解离速率常数 (s^-1)
            volume: 反应体积 (L)
            temperature: 温度 (K)
            seed: 随机数生成器种子
        """
        # 转换浓度为分子数
        avogadro = 6.022e23  # 阿伏伽德罗常数
        self.initial_protein = int(protein_conc * 1e-6 * avogadro * volume)
        self.initial_ligand = int(ligand_conc * 1e-6 * avogadro * volume)
        self.k_on = k_on * 1e-6  # 转换为分子数单位
        self.k_off = k_off
        self.volume = volume
        self.temperature = temperature
        self.boltzmann = 1.38e-23  # 玻尔兹曼常数 (J/K)

        # 初始化随机数生成器
        self.rng = np.random.default_rng(seed)

        # 模拟结果存储
        self.simulation_results = []

    def calculate_reaction_rates(self, protein: int, ligand: int, complex: int) -> Tuple[float, float]:
        """
        计算结合和解离反应的速率

        参数:
            protein: 当前蛋白质分子数
            ligand: 当前配体分子数
            complex: 当前复合物分子数

        返回:
            结合速率和解离速率
        """
        binding_rate = self.k_on * protein * ligand
        dissociation_rate = self.k_off * complex
        return binding_rate, dissociation_rate

    def run_simulation(self, max_time: float, record_interval: float = 0.1) -> Dict:
        """
        运行 Gillespie 随机模拟

        参数:
            max_time: 最大模拟时间 (s)
            record_interval: 记录数据的时间间隔 (s)

        返回:
            包含模拟结果的字典
        """
        # 初始化分子数和时间
        protein = self.initial_protein
        ligand = self.initial_ligand
        complex = 0
        time = 0.0

        # 记录初始状态
        next_record_time = 0.0
        time_points = [time]
        protein_counts = [protein]
        ligand_counts = [ligand]
        complex_counts = [complex]

        # 主模拟循环
        while time < max_time:
            # 计算反应速率
            binding_rate, dissociation_rate = self.calculate_reaction_rates(protein, ligand, complex)
            total_rate = binding_rate + dissociation_rate

            # 如果没有可能的反应，退出循环
            if total_rate <= 0:
                break

            # 确定下一个反应的时间
            dt = self.rng.exponential(1.0 / total_rate)
            time += dt

            # 记录数据
            while time >= next_record_time and next_record_time <= max_time:
                time_points.append(next_record_time)
                protein_counts.append(protein)
                ligand_counts.append(ligand)
                complex_counts.append(complex)
                next_record_time += record_interval

            # 确定发生的反应类型
            if self.rng.random() < binding_rate / total_rate:
                # 发生结合反应
                protein -= 1
                ligand -= 1
                complex += 1
            else:
                # 发生解离反应
                protein += 1
                ligand += 1
                complex -= 1

        # 确保记录到最大时间点
        if time_points[-1] < max_time:
            time_points.append(max_time)
            protein_counts.append(protein)
            ligand_counts.append(ligand)
            complex_counts.append(complex)

        # 转换为浓度 (μM)
        avogadro = 6.022e23
        protein_conc = np.array(protein_counts) / (avogadro * self.volume) * 1e6
        ligand_conc = np.array(ligand_counts) / (avogadro * self.volume) * 1e6
        complex_conc = np.array(complex_counts) / (avogadro * self.volume) * 1e6

        # 计算结合分数
        if self.initial_protein > 0:
            binding_fraction = complex_conc / (protein_conc + complex_conc)
        else:
            binding_fraction = np.zeros_like(complex_conc)

        # 存储结果
        result = {
            'time': np.array(time_points),
            'protein_concentration': protein_conc,
            'ligand_concentration': ligand_conc,
            'complex_concentration': complex_conc,
            'binding_fraction': binding_fraction
        }

        self.simulation_results.append(result)
        return result

    def run_multiple_simulations(self, num_simulations: int, max_time: float,
                                 record_interval: float = 0.1) -> List[Dict]:
        """
        运行多次独立模拟

        参数:
            num_simulations: 模拟次数
            max_time: 最大模拟时间 (s)
            record_interval: 记录数据的时间间隔 (s)

        返回:
            包含所有模拟结果的列表
        """
        all_results = []
        for i in range(num_simulations):
            result = self.run_simulation(max_time, record_interval)
            all_results.append(result)
            print(f"完成模拟 {i + 1}/{num_simulations}")
        return all_results

    def analyze_concentration_effect(self, protein_concentrations: List[float],
                                     ligand_concentration: float, num_simulations: int = 1,
                                     max_time: float = 10.0) -> Dict:
        """
        分析蛋白质浓度对结合的影响

        参数:
            protein_concentrations: 蛋白质浓度列表 (μM)
            ligand_concentration: 配体浓度 (μM)
            num_simulations: 每个浓度的模拟次数
            max_time: 最大模拟时间 (s)

        返回:
            包含不同浓度下结合分数的结果
        """
        results = {}

        for conc in protein_concentrations:
            # 更新蛋白质浓度
            avogadro = 6.022e23
            self.initial_protein = int(conc * 1e-6 * avogadro * self.volume)

            # 运行多次模拟
            conc_results = []
            for _ in range(num_simulations):
                result = self.run_simulation(max_time)
                conc_results.append(result['binding_fraction'][-1])  # 最终结合分数

            # 计算平均值和标准差
            results[conc] = {
                'mean': np.mean(conc_results),
                'std': np.std(conc_results),
                'all_values': conc_results
            }

        # 恢复原始蛋白质浓度
        self.initial_protein = int(self.initial_protein * 1e-6 * avogadro * self.volume)

        return results

    def analyze_temperature_effect(self, temperatures: List[float],
                                   num_simulations: int = 1, max_time: float = 10.0) -> Dict:
        """
        分析温度对结合的影响

        参数:
            temperatures: 温度列表 (K)
            num_simulations: 每个温度的模拟次数
            max_time: 最大模拟时间 (s)

        返回:
            包含不同温度下结合分数的结果
        """
        results = {}

        for temp in temperatures:
            # 更新温度
            self.temperature = temp

            # 运行多次模拟
            temp_results = []
            for _ in range(num_simulations):
                result = self.run_simulation(max_time)
                temp_results.append(result['binding_fraction'][-1])  # 最终结合分数

            # 计算平均值和标准差
            results[temp] = {
                'mean': np.mean(temp_results),
                'std': np.std(temp_results),
                'all_values': temp_results
            }

        # 恢复原始温度
        self.temperature = 298.15

        return results

    def calculate_equilibrium_constant(self, results: Dict) -> float:
        """
        从模拟结果计算平衡常数

        参数:
            results: 单次模拟的结果

        返回:
            平衡常数 Kd (μM)
        """
        # 获取最终浓度
        final_protein = results['protein_concentration'][-1]
        final_ligand = results['ligand_concentration'][-1]
        final_complex = results['complex_concentration'][-1]

        # 计算 Kd = [P][L]/[PL]
        if final_complex > 0:
            kd = (final_protein * final_ligand) / final_complex
        else:
            kd = float('inf')

        return kd


class SimulationAnalyzer:
    """模拟结果分析和可视化类"""

    @staticmethod
    def plot_concentration_time_course(results: Dict, title: str = "分子浓度随时间变化",
                                       save_path: str = None):
        """
        绘制浓度随时间变化的曲线

        参数:
            results: 模拟结果字典
            title: 图表标题
            save_path: 保存图像的路径，为None则不保存
        """
        time = results['time']
        protein = results['protein_concentration']
        ligand = results['ligand_concentration']
        complex = results['complex_concentration']

        plt.figure(figsize=(10, 6))
        plt.plot(time, protein, 'b-', label='蛋白质')
        plt.plot(time, ligand, 'r-', label='配体')
        plt.plot(time, complex, 'g-', label='复合物')

        plt.xlabel('时间 (s)')
        plt.ylabel('浓度 (μM)')
        plt.title(title)
        plt.legend()
        plt.grid(True)

        if save_path:
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    @staticmethod
    def plot_binding_fraction(results: Dict, title: str = "结合分数随时间变化",
                              save_path: str = None):
        """
        绘制结合分数随时间变化的曲线

        参数:
            results: 模拟结果字典
            title: 图表标题
            save_path: 保存图像的路径，为None则不保存
        """
        time = results['time']
        binding_fraction = results['binding_fraction']

        plt.figure(figsize=(10, 6))
        plt.plot(time, binding_fraction, 'm-')

        plt.xlabel('时间 (s)')
        plt.ylabel('结合分数')
        plt.title(title)
        plt.grid(True)

        if save_path:
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    @staticmethod
    def plot_multiple_simulations(all_results: List[Dict], title: str = "多次模拟结果比较",
                                  save_path: str = None):
        """
        绘制多次模拟的结合分数曲线

        参数:
            all_results: 所有模拟结果的列表
            title: 图表标题
            save_path: 保存图像的路径，为None则不保存
        """
        plt.figure(figsize=(10, 6))

        # 创建自定义颜色映射
        colors = plt.cm.viridis(np.linspace(0, 1, len(all_results)))

        for i, result in enumerate(all_results):
            time = result['time']
            binding_fraction = result['binding_fraction']
            plt.plot(time, binding_fraction, color=colors[i], alpha=0.7,
                     label=f'模拟 {i + 1}')

        plt.xlabel('时间 (s)')
        plt.ylabel('结合分数')
        plt.title(title)
        plt.grid(True)
        plt.legend()

        if save_path:
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    @staticmethod
    def plot_concentration_effect(results: Dict, title: str = "蛋白质浓度对结合的影响",
                                  save_path: str = None):
        """
        绘制蛋白质浓度对结合分数的影响

        参数:
            results: 浓度分析结果
            title: 图表标题
            save_path: 保存图像的路径，为None则不保存
        """
        concentrations = sorted(results.keys())
        mean_values = [results[conc]['mean'] for conc in concentrations]
        std_values = [results[conc]['std'] for conc in concentrations]

        plt.figure(figsize=(10, 6))
        plt.errorbar(concentrations, mean_values, yerr=std_values, fmt='o-', capsize=5)

        plt.xlabel('蛋白质浓度 (μM)')
        plt.ylabel('平衡结合分数')
        plt.title(title)
        plt.grid(True)

        if save_path:
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    @staticmethod
    def plot_temperature_effect(results: Dict, title: str = "温度对结合的影响",
                                save_path: str = None):
        """
        绘制温度对结合分数的影响

        参数:
            results: 温度分析结果
            title: 图表标题
            save_path: 保存图像的路径，为None则不保存
        """
        temperatures = sorted(results.keys())
        mean_values = [results[temp]['mean'] for temp in temperatures]
        std_values = [results[temp]['std'] for temp in temperatures]

        plt.figure(figsize=(10, 6))
        plt.errorbar(temperatures, mean_values, yerr=std_values, fmt='o-', capsize=5)

        plt.xlabel('温度 (K)')
        plt.ylabel('平衡结合分数')
        plt.title(title)
        plt.grid(True)

        if save_path:
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()


def run_simulation_example():
    """运行模拟示例"""
    # 创建输出目录
    if not os.path.exists('simulation_results'):
        os.makedirs('simulation_results')

    # 初始化模拟器 (典型的蛋白质-配体相互作用参数)
    simulator = BiomolecularSimulator(
        protein_conc=1.0,  # 蛋白质浓度 (μM)
        ligand_conc=5.0,  # 配体浓度 (μM)
        k_on=1.0e3,  # 结合速率常数 (μM^-1·s^-1)
        k_off=0.1,  # 解离速率常数 (s^-1)
        volume=1.0e-15,  # 体积 (L, 相当于1飞升)
        temperature=298.15,  # 温度 (K)
        seed=42  # 随机数种子，保证结果可重复
    )

    # 1. 运行单次模拟
    print("运行单次模拟...")
    single_result = simulator.run_simulation(max_time=10.0, record_interval=0.01)

    # 计算平衡常数
    kd = simulator.calculate_equilibrium_constant(single_result)
    print(f"计算得到的平衡常数 Kd = {kd:.4f} μM")
    print(f"理论 Kd = {simulator.k_off / simulator.k_on * 1e6:.4f} μM")

    # 可视化单次模拟结果
    analyzer = SimulationAnalyzer()
    analyzer.plot_concentration_time_course(
        single_result,
        title="蛋白质-配体结合的浓度变化",
        save_path="simulation_results/concentration_time_course.png"
    )
    analyzer.plot_binding_fraction(
        single_result,
        title="蛋白质-配体结合分数变化",
        save_path="simulation_results/binding_fraction.png"
    )

    # 2. 运行多次模拟，评估变异性
    print("\n运行多次模拟以评估变异性...")
    num_simulations = 10
    all_results = simulator.run_multiple_simulations(
        num_simulations=num_simulations,
        max_time=10.0,
        record_interval=0.1
    )

    # 计算平均平衡常数
    kd_values = [simulator.calculate_equilibrium_constant(result) for result in all_results]
    print(f"多次模拟得到的平均 Kd = {np.mean(kd_values):.4f} ± {np.std(kd_values):.4f} μM")

    # 可视化多次模拟结果
    analyzer.plot_multiple_simulations(
        all_results,
        title=f"{num_simulations}次独立模拟的结合分数比较",
        save_path="simulation_results/multiple_simulations.png"
    )

    # 3. 分析蛋白质浓度的影响
    print("\n分析蛋白质浓度的影响...")
    protein_concentrations = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0]  # μM
    conc_results = simulator.analyze_concentration_effect(
        protein_concentrations=protein_concentrations,
        ligand_concentration=5.0,  # 保持配体浓度恒定
        num_simulations=3,
        max_time=10.0
    )

    # 可视化浓度影响
    analyzer.plot_concentration_effect(
        conc_results,
        title="蛋白质浓度对平衡结合分数的影响",
        save_path="simulation_results/concentration_effect.png"
    )

    # 4. 分析温度的影响
    print("\n分析温度的影响...")
    temperatures = [273.15, 283.15, 293.15, 303.15, 313.15]  # K (0°C 到 40°C)
    temp_results = simulator.analyze_temperature_effect(
        temperatures=temperatures,
        num_simulations=3,
        max_time=10.0
    )

    # 可视化温度影响
    analyzer.plot_temperature_effect(
        temp_results,
        title="温度对平衡结合分数的影响",
        save_path="simulation_results/temperature_effect.png"
    )

    print("\n模拟完成！结果已保存到 simulation_results 目录")


if __name__ == "__main__":
    run_simulation_example()