1、生成回归模型的数据
# 生成回归模型的数据
X, y, coef = datasets.make_regression(n_samples=100,
n_features=1,
# n_targets : int, optional (default=1) 参数
# The number of regression targets, i.e.,
# the dimension of the y output vector associated with a sample.
# By default, the output is a scalar.
n_targets=1,
# noise : float, optional (default=0.0)参数
# The standard deviation of the gaussian noise applied to the output.
noise=10,
coef=True)
print("X的数据为:\n", X)
print("y的数据为:\n", y)
print("coef的数据为:\n", coef)
plt.scatter(X, y, color='black')
plt.plot(X, X*coef, color='blue', linewidth=3)
plt.show()
2、分类模型随机数据
from sklearn.datasets import make_classification
# X1为样本特征,Y1为样本类别输出, 共400个样本,每个样本2个特征,输出有3个类别,没有冗余特征,每个类别一个簇
X1, Y1 = make_classification(n_samples=400, n_features=2, n_redundant=0,
n_clusters_per_class=1, n_classes=3, n_informative=2)
print(X1)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
plt.show()
3、聚类模型随机数据
from sklearn.datasets import make_blobs
# X为样本特征,Y为样本簇类别, 共1000个样本,每个样本2个特征,共3个簇,簇中心在[-1,-1], [1,1], [2,2], 簇方差分别为[0.4, 0.5, 0.2]
X, y = make_blobs(n_samples=1000, n_features=2, centers=[[-1, -1], [1, 1], [2, 2]], cluster_std=[0.4, 0.5, 0.2])
# print(y)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
plt.show()
4、分组正态分布混合数据
from sklearn.datasets import make_gaussian_quantiles
#生成2维正态分布,生成的数据按分位数分成3组,1000个样本,2个样本特征均值为1和2,协方差系数为2
X1, Y1 = make_gaussian_quantiles(n_samples=1000, n_features=2, n_classes=3, mean=[1, 2], cov=2)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
plt.show()
引用:随机数据生成