贝叶斯网的两种实现方式(以泰坦尼克号数据集为例):
1.基于评分搜索的贝叶斯网络
2.基于条件独立性测试的贝叶斯网络结构
’
import pandas as pd
from pgmpy.estimators import HillClimbSearch
from pgmpy.models import BayesianModel
from pgmpy.estimators import BicScore
from pgmpy.estimators import BayesianEstimator
import networkx as nx
from matplotlib import pyplot as plt
from pgmpy.inference import VariableElimination
import warnings
from pgmpy.estimators import ConstraintBasedEstimator
import time
from sklearn.metrics import accuracy_score
warnings.filterwarnings("ignore")
def score_BN(train):
hc = HillClimbSearch(train, scoring_method=BicScore(train))
best_model = hc.estimate()
edges = best_model.edges()
# print(best_model.edges())
model = BayesianModel(edges)
model.fit(train, estimator=BayesianEstimator)
return model
def Independence_BN(train):
est = ConstraintBasedEstimator(train)
edges = est.estimate(significance_level=0.01).edges()
model=BayesianModel(edges)
model.fit(train, estimator=BayesianEstimator)
return model
def BN_view(model):
nx.draw(model,
with_labels=True,
node_size=1000,
font_weight='bold',
node_color='y',
pos={"Cabin": [1.5, 3], "Pclass": [1.5, 2.5], "Fare": [2, 2], "Sex": [1.5, 1], "Survived": [1.5, 1.5],
"Age": [1, 2]})
plt.text(1.5, 3, model.get_cpds("Cabin"), fontsize=5, color='b')
plt.text(1.5, 2.5, model.get_cpds("Pclass"), fontsize=5, color='b')
plt.text(2, 2, model.get_cpds("Fare"), fontsize=5, color='b')
# plt.text(3, 2, n_model.get_cpds("D"),