《计算传播基础》读书笔记:第五章 统计思维(一)

第五章 统计思维

KL Divergence

from sklearn import manifold, datasets
from scipy.stats import entropy, pearsonr
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

n_points = 1000
X, color = datasets.make_s_curve(n_points, random_state=0)
x, col = datasets.make_swiss_roll(n_points, noise=0.05)

X1, X2, X3 = X.T

# Create figure
fig = plt.figure(figsize=(8, 8))

# Add 3d scatter plot
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)
plt.show()

# Create figure
fig = plt.figure(figsize=(8, 8))

# Add 3d scatter plot
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=col, cmap=plt.cm.Spectral)
ax.view_init(4, -72)
plt.show()

plt.plot(X1, X3, 'ro')
plt.show()

print(normalized_mutual_info_score(X1, X3), pearsonr(X1, X3))
1.0 (-0.08697318258913389, 0.005921261651160034)
def processNegVals(x):
    x = np.array(x)
    minx = np.min(x)
    if minx < 0:
        x = x + abs(minx)
    x = x + 0.000001
    px = x/np.sum(x)
    return px


print(entropy(processNegVals(X1), processNegVals(X3)))
0.9820849486320271
def KL(P, Q):
    epsilon = 0.00001
    P = processNegVals(P)
    Q = processNegVals(Q)
    divergence = np.sum(P*np.log(P/Q))
    return divergence


print(KL(X1, X3))
0.9820849486320271
plt.plot(x[:, 0], x[:, 2], 'ro')
plt.show()

print(KL(x[:, 0], x[:, 2]))
0.5714407581920489
print(entropy(processNegVals(x[:, 0]), processNegVals(x[:, 2])))
0.5730153281109394
import scipy

vec = scipy.special.rel_entr(processNegVals(x[:, 0]), processNegVals(x[:, 2]))
kl_div = np.sum(vec)
print(kl_div)
0.5184330437935203
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples,
                                      factor=.5,
                                      noise=.05)
c1, c2 = noisy_circles[0][:, 0], noisy_circles[0][:, 1]
plt.plot(c1, c2, 'ro')
plt.show()

print(KL(c1, c2), entropy(processNegVals(c1), processNegVals(c2)), pearsonr(c1, c2))
0.3763193691703814 0.3763193691703816 (3.7145624444947286e-05, 0.9988530856054932)
noisy_moons = datasets.make_moons(n_samples, noise=.05)
m1, m2 = noisy_moons[0][:, 0], noisy_moons[0][:, 1]
plt.plot(m1, m2, 'ro')
plt.show()

n_samples = 1000
de_linearize = lambda X: np.cos(1.5 * np.pi * X) + np.cos(5 * np.pi * X)
X = np.sort(np.random.rand(n_samples)) * 2
y = de_linearize(X) + np.random.randn(n_samples) * 0.1
plt.plot(X, y)
plt.show()

print(KL(X, y), entropy(processNegVals(X), processNegVals(y)), pearsonr(X, y))
0.3988967088040082 0.3988967088040082 (-0.050574762504227044, 0.10996799031076543)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值