import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons,make_circles,make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn import svm
import matplotlib as mpl
from sklearn import tree
from sklearn import neighbors
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import BaggingClassifier
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import accuracy_score,roc_curve,auc
from sklearn.ensemble import VotingClassifier
np.random.seed(555)
"""
导入数据
"""
df = pd.read_csv('C:\\Users\\asus\\Desktop\\数据\\zhiyintu.csv')
X2=df.iloc[:,4:6] #比值系数
X3=df.iloc[:,8:11] #单位向量
#X4=df.iloc[:,6:8] #比值系数
X5 = np.array(X2)
y1=df['guzhang']
"""
构造训练数据
"""
XD=[]
yD=[]
datasets =[]
for i in range(len(y1)):
XX=X2[i:i+1]
yy=y1[i:i+1]
XXX = np.tile(np.array(XX),(30,1)) + abs(np.tile(np.array(XX),(30,1)))*0.1*np.random.randn(1*30,2)
yyy = np.tile(np.array(yy),(30))
XD.append(XXX)
yD.append(yyy)
data=(XXX,yyy)
datasets.append(data)
#下面代码将每一类的故障数据集合到一起
XXXX=np.r_[XD[0],XD[1],XD[2],XD[3],XD[4],XD[5],XD[6],XD[7],XD[8],XD[9],XD[10],XD[11],XD[12]]
yyyy=np.r_[yD[0],yD[1],yD[2],yD[3],yD[4],yD[5],yD[6],yD[7],yD[8],yD[9],yD[10],yD[11],yD[12]]
"""
构造测试数据
"""
XD1=[]
yD1=[]
datasets1 =[]
for i in range(len(y1)):
XX=X2[i:i+1]
yy=y1[i:i+1]
XXX = np.tile(np.array(XX),(30,1)) + abs(np.tile(np.array(XX),(30,1)))*0.1*np.random.randn(1*30,2)
yyy = np.tile(np.array(yy),(30))
XD1.append(XXX)
yD1.append(yyy)
data1=(XXX,yyy)
datasets1.append(data1)
XXXX1=np.r_[XD1[0],XD1[1],XD1[2],XD1[3],XD1[4],XD1[5],XD1[6],XD1[7],XD1[8],XD1[9],XD1[10],XD1[11],XD1[12]]
yyyy1=np.r_[yD1[0],yD1[1],yD1[2],yD1[3],yD1[4],yD1[5],yD1[6],yD1[7],yD1[8],yD1[9],yD1[10],yD1[11],yD1[12]]
"""
构造随机采集数据点的函数
"""
def random_sampling(dataset, labelset, m):
data = []
label = []
for i in range(m):
a = np.random.randint(0, len(dataset) - 1)
data.append(dataset[a])
label.append(labelset[a])
return data, label
"""
构造产生子集个数的函数
"""
def shujuziji(n,m):#n控制子集的个数,m控制子集中数据量的多少
for i in range(n):
datasets2 =[]
data1=(random_sampling(XXXX1, yyyy1, m))
datasets2.append(data1)
return datasets2
#下行代码测试函数,产生数据量大小为30的3个数据子集
print(datasets1)
print(shujuziji(3,30))
通过原数据集实现产生任意个任意数据量大小的子集
最新推荐文章于 2022-02-21 07:00:00 发布