1、beta分布
import numpy as np
import matplotlib.pylab as plt
from scipy.stats import beta
if __name__ == '__main__':
a = 0.5
b = 0.5
x = np.arange(0.01, 1, 0.01)
y = beta.pdf(x, a, b)
plt.plot(x, y)
plt.show()
2、指数分布
import numpy as np
import matplotlib.pylab as plt
from scipy import stats
from scipy.stats import norm
if __name__ == '__main__':
lambd = 0.5
x = np.arange(0.1, 15, 0.1) + 0.001
# y = np.exp(-lambd * x) * lambd + 0.001
# print(y)
# Y = np.log(np.array(y))
# plt.figure(figsize=(10, 5))
# plt.scatter(x, Y)
# plt.plot(x, np.log(lambd) - lambd * x, '--')
# plt.xlim([0, 15])
y = 1 - np.exp(-lambd * x)
Y = -np.log(np.array(y))
plt.scatter(x, Y)
plt.plot(x, -np.log(1 - np.exp(-lambd * x)), '--')
plt.show()
3、正态分布
import numpy as np
import matplotlib.pylab as plt
from scipy.stats import norm
if __name__ == '__main__':
mu = 0
sigma = 1
x = np.arange(-5, 5, 0.1)
print(len(x))
y = (sigma * pow(2 * np.pi, 0.5)) ** (-1) * np.exp((-1) * (2 * pow(sigma, 2)) ** (-1) * pow(x - mu, 2)) + 0.000001
plt.scatter(x, y)
# Y = np.log(np.array(y))
# print(Y)
# plt.scatter(x, Y)
# plt.plot(x, -1 * np.log(sigma * pow(2 * np.pi, 0.5)) + (-1) * (2 * pow(sigma, 2)) ** (-1) * pow(x - mu, 2), '-')
# y = norm.pdf(x, 0, 1)
plt.show()
4、泊松分布
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
num_years = [4, 10, 7, 5, 4, 0, 0, 1]
lmbda = sum(x * y for x, y in zip(range(8), num_years)) / sum(num_years)
print(lmbda)
rv = st.poisson(lmbda)
x = range(8)
plt.bar(np.array(x), num_years)
plt.plot(x, sum(num_years) * rv.pmf(x), c='r', label='Poisson')
plt.xlim([-1, 8])
plt.ylim([0, 11])
plt.legend(loc='best')
plt.show()
5、幂律分布
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from scipy.stats import norm
def DataGenerate():
X = np.arange(10, 1010, 10)
noise = norm.rvs(0, size=100, scale=0.2)
Y = []
for i in range(len(X)):
Y.append(10.8 * pow(X[i], -0.3) + noise[i])
# plot raw data
Y = np.array(Y)
# plt.title("Raw data")
# plt.scatter(X, Y, color='b')
# plt.show()
X0 = np.log10(X)
Y0 = np.log10(Y)
return X, Y, X0, Y0
def DataFitAndVisualization(X, Y):
X_parameter = []
Y_parameter = []
for single_square_feet, single_price_value in zip(X, Y):
X_parameter.append([float(single_square_feet)])
Y_parameter.append(float(single_price_value))
regr = linear_model.LinearRegression() # 一元逻辑回归
# 获取到参数,画直线
print(X_parameter, Y_parameter)
regr.fit(X_parameter, Y_parameter)
C = 10 ** regr.intercept_
print(C)
r = regr.coef_
print('Coefficients: \n', regr.coef_)
print("Intercept:\n", regr.intercept_)
# The mean square error
print("Residual sum of squares: %.8f"
% np.mean((regr.predict(X_parameter) - Y_parameter) ** 2))
plt.title("Log Data")
plt.scatter(X_parameter, Y_parameter, color='black')
plt.plot(X_parameter, regr.predict(X_parameter), color='blue', linewidth=3)
# plt.xticks(())
# plt.yticks(())
plt.show()
return C, r
if __name__ == "__main__":
X, Y, X0, Y0 = DataGenerate()
C, r = DataFitAndVisualization(X0, Y0)
print(C, X.shape, r.shape) # 10.6884732354 (100,) (1,)
plt.scatter(X, Y, color='blue')
plt.plot(X, C * X ** r, color='red', linewidth=1.0, linestyle='-')
plt.show()