一、数据
现有如下数据:
要研究baidu、google分别与truth的关系
二、代码
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
data = pd.read_excel("exp.xlsx")
# print(data)
truth = data['truth'].values
baidu = data['baidu'].values
google = data['google'].values
k_baidu, b_baidu, r_baidu, p_baidu, std_baidu = st.linregress(baidu, truth)
k_google, b_google, r_google, p_google, std_google = st.linregress(google, truth)
print("baidu: true bleu = ", k_baidu, "* baidu bleu + ", b_baidu)
print("google: true bleu = ", k_google, "* google bleu + ", b_google)
min_x = 1
max_x = 0
for x in baidu:
if min_x > x:
min_x = x
if max_x < x:
max_x = x
for x in google:
if min_x > x:
min_x = x
if max_x < x:
max_x = x
x = np.arange(min_x - 0.01, max_x + 0.01, 0.001)
y_baidu = k_baidu * x + b_baidu
y_google = k_google * x + b_google
plot_type = 1
if plot_type == 1:
plt.scatter(baidu, truth, c='r', label='baidu')
plt.scatter(google, truth, c='b', label='google')
plt.plot(x, y_baidu, lw=2, ls='-', color='r', label='baidu regression')
plt.plot(x, y_google, lw=2, ls='-', color='b', label='google regression')
plt.xlabel("baidu / google bleu")
plt.ylabel("true bleu")
plt.legend(loc='upper left')
plt.grid()
plt.gca().set_aspect(1)
plt.show()
elif plot_type == 2:
plt.scatter(baidu, truth, c='r', label='baidu')
plt.plot(x, y_baidu, lw=2, ls='-', color='r', label='baidu regression')
plt.xlabel("baidu / google bleu")
plt.ylabel("true bleu")
plt.legend(loc='upper left')
plt.grid()
plt.gca().set_aspect(1)
plt.show()
elif plot_type == 3:
plt.scatter(google, truth, c='b', label='google')
plt.plot(x, y_google, lw=2, ls='-', color='b', label='google regression')
plt.xlabel("baidu / google bleu")
plt.ylabel("true bleu")
plt.legend(loc='upper left')
plt.grid()
plt.gca().set_aspect(1)
plt.show()
三、运行结果
plot_type = 1
plot_type = 2
plot_type = 3
baidu: true bleu = 0.5195168773670095
×
\times
× baidu bleu + 0.024154946754922704
google: true bleu = 0.5338643844656664
×
\times
× google bleu + 0.024229148708655016