变频器故障数据由MATLAB Simulink生成。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, GridSearchCV, KFold
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, precision_score
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import make_column_transformer
data = pd.read_excel("All Data.xlsx", header=0)
new_df = data.drop('Unnamed: 0', axis='columns')
new_df.head(10)
new_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40040 entries, 0 to 40039
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Tn (Rated Torque) N*m 40040 non-null float64
1 k (constant of proportionality) 40040 non-null float64
2 time (sec) 40040 non-null float64
3 Ia (Amp) 40040 non-null float64
4 Ib (Amp) 40040 non-null float64
5 Ic (Amp) 40037 non-null float64
6 Vab (V) 40040 non-null float64
7 Torque (N*m) 40040 non-null float64
8 Speed (rad/s) 40040 non-null float64
9 Category 40040 non-null object
dtypes: float64(9), object(1)
memory usage: 3.1+ MB
new_df.describe()
new_df.isnull().sum()
Tn (Rated Torque) N*m 0
k (constant of proportionality) 0
time (sec) 0
Ia (Amp) 0
Ib (Amp) 0
Ic (Amp) 3
Vab (V) 0
Torque (N*m) 0
Speed (rad/s) 0
Category 0
dtype: int64
new_df.Category.value_counts()# data set is balanced to some extent. No need for corrective measures
Category
NOM 13013
PTPF 7007
PTGF 7007
OVF 5005
OLF 5005
UVF 3003
Name: count, dtype: int64
sns.catplot(data=new_df, x='Category', kind='count', height=4, aspect=2)
sns.catplot(data=new_df, x='Category', y='Tn (Rated Torque) N*m', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='k (constant of proportionality)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='time (sec)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Ia (Amp)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Ib (Amp)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Ic (Amp)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Vab (V)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Torque (N*m)', kind='box', height=6, aspect=3)
sns.catplot(data=new_df, x='Category', y='Speed (rad/s)',kind='box', height=6, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Torque (N*m)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Vab (V)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Speed (rad/s)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="k (constant of proportionality)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Ia (Amp)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Ib (Amp)", kind="line", height = 5, aspect=3)
sns.relplot(data=new_df, x="time (sec)", y="Ic (Amp)", kind="line", height = 5, aspect=3)
label_category = LabelEncoder()
new_df['Category'] = label_category.fit_transform(new_df['Category'])
new_df.head()
new_df['Category'].unique()
array([0, 4, 3, 5, 2, 1])
new_df.Category.value_counts(
Category
0 13013
4 7007
3 7007
2 5005
1 5005
5 3003
Name: count, dtype: int64
plt.figure(figsize=(8,6))
dataplot = sns.heatmap(new_df.corr(), cmap="YlGnBu", annot=True)
# Correlation plot showing the correlation between continuous features and the target label
new_df = new_df.drop(['time (sec)','Torque (N*m)'], axis=1)
new_df
x = new_df.drop(['Category'], axis=1)
x.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40040 entries, 0 to 40039
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Tn (Rated Torque) N*m 40040 non-null float64
1 k (constant of proportionality) 40040 non-null float64
2 Ia (Amp) 40040 non-null float64
3 Ib (Amp) 40040 non-null float64
4 Ic (Amp) 40037 non-null float64
5 Vab (V) 40040 non-null float64
6 Speed (rad/s) 40040 non-null float64
dtypes: float64(7)
memory usage: 2.1 MB
y = new_df['Category']
y.info()
<class 'pandas.core.series.Series'>
RangeIndex: 40040 entries, 0 to 40039
Series name: Category
Non-Null Count Dtype
-------------- -----
40040 non-null int32
dtypes: int32(1)
memory usage: 156.5 KB
numeric_transformer = make_pipeline(
SimpleImputer(strategy = "mean"),
MinMaxScaler()
)
numeric_transformer.fit(x)
Pipeline(steps=[('simpleimputer', SimpleImputer()),
('minmaxscaler', MinMaxScaler())])
X = numeric_transformer.transform(x)
X = pd.DataFrame(X, columns = x.columns)
X.head()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
# splitting the dataset into training and testing subset
cv = KFold(n_splits=5)
params = [{'n_neighbors': np.arange(1, 50, 2),
'weights': ['uniform', 'distance'],
'p': [1,2],
'n_jobs': [-1]
}]
knn_clf = KNeighborsClassifier()
clf = GridSearchCV(knn_clf,
param_grid=params,
scoring='accuracy',
cv=cv)
clf.fit(X_train,y_train)
GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
estimator=KNeighborsClassifier(),
param_grid=[{'n_jobs': [-1],
'n_neighbors': array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
35, 37, 39, 41, 43, 45, 47, 49]),
'p': [1, 2], 'weights': ['uniform', 'distance']}],
scoring='accuracy')
clf.best_params_
{'n_jobs': -1, 'n_neighbors': 27, 'p': 1, 'weights': 'uniform'}
knn_clf = KNeighborsClassifier(n_neighbors=23, weights='uniform', p=1, n_jobs=-1 )
cv = KFold(n_splits=10)
scores_knn = pd.DataFrame(cross_validate(knn_clf, X_train, y_train, scoring= 'accuracy', cv=cv, n_jobs=-1,
error_score='raise', return_train_score=True))
scores_knn.mean()
fit_time 0.062957
score_time 0.256770
test_score 0.933348
train_score 0.944663
dtype: float64
knn_clf.fit(X_train, y_train)
knn_pred=knn_clf.predict(X_test)
accuracy_score(y_test, knn_pred)
0.9313186813186813
cm = confusion_matrix(y_test, knn_pred, normalize='all')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["NOM", "PTPF","PTGF", "OVF", "OLF", "UVF"])
sns.set_style("white")
plt.rc('font', size=10)
disp.plot()
plt.show()