1数据预处理
import pandas as pd
import glob
import os
path = 'path_to_your_csv_files/'
all_files = glob.glob(os.path.join(path, "*.csv"))
df_list = []
for file in all_files:
df = pd.read_csv(file)
df_list.append(df)
data = pd.concat(df_list, ignore_index=True)
data['保存时间'] = pd.to_datetime(data['保存时间'])
data = data.dropna()
print(data.dtypes)
2数据探索性分析(EDA)
import plotly.io as pio
import plotly.graph_objs as go
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
print(data.describe())
variables_to_analyze = ['主轴电机实时转速','主轴电机实时电流','腔体压力','喂料电机实时转速','喂料电机实时电流','循环泵口压力','出料压力']
fig = go.Figure()
for variable in variables_to_analyze:
fig.add_trace(go.Scatter(x=data['保存时间'],y=data[variable],mode='lines',name=variable))
fig.update_layout(title='设备参数在一个月内的曲线变化图',xaxis_title='时间',yaxis_title='参数值',hovermode='x unified')
pio.show(fig)
corr_matrix = data.corr()
print(corr_matrix)
3数据可视化
import seaborn as sns
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('参数相关性热力图')
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(x='主轴电机实时转速', y='主轴电机实时电流', data=data)
plt.xlabel('主轴电机实时转速')
plt.ylabel('主轴电机实时电流')
plt.title('主轴电机实时转速与主轴电机实时电流的关系')
plt.show()
4特征工程
data['转速电流比'] = data['主轴电机实时转速'] / data['主轴电机实时电流']
features = ['主轴电机设定转速', '主轴电机实时转速', '主轴电机实时电流', '转速电流比', '腔体压力']
target = '出料压力'
X = data[features]
y = data[target]
5建模与预测
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')