% 读取Excel文件
filename = 'your_excel_file.xlsx';
data = readtable(filename);
% 初始化存储被移除的用户编号
removedUsers = [];
% 创建一个新的表格来存储清洗后的数据
cleanedDataTable = table();
% 获取所有用户编号
userIDs = unique(data.A);
% 遍历每个用户进行处理
for i = 1:length(userIDs)
% 选择当前用户的数据
userData = data(data.A == userIDs(i), :);
% 获取日期和96个数据点
powerData = userData{:, 7:end}; % 假设第7列开始为96个数据点
% 筛选步骤1:找到超过10%为零或缺失值的天数
invalidDays = sum(powerData == 0 | isnan(powerData), 2) > 0.1 * size(powerData, 2);
% 检查无效天数是否超过全年的三分之一
if sum(invalidDays) > size(powerData, 1) / 3
removedUsers = [removedUsers; userIDs(i)];
continue;
end
% 清洗规则:零值处理
cleanedData = powerData;
for j = 1:size(cleanedData, 1)
% 处理首尾的零值
if isnan(cleanedData(j, 1)) || cleanedData(j, 1) == 0
cleanedData(j, 1) = mean([cleanedData(j, 2), cleanedData(max(j-1, 1), end)], 'omitnan');
end
if isnan(cleanedData(j, end)) || cleanedData(j, end) == 0
cleanedData(j, end) = mean([cleanedData(j, end-1), cleanedData(min(j+1, size(cleanedData, 1)), 1)], 'omitnan');
end
% 处理中间的零值
for k = 2:size(cleanedData, 2)-1
if cleanedData(j, k) == 0 || isnan(cleanedData(j, k))
cleanedData(j, k) = mean([cleanedData(j, k-1), cleanedData(j, k+1)], 'omitnan');
end
end
end
% 将清洗后的数据添加到总表格中
cleanedUserData = userData;
cleanedUserData{:, 7:end} = cleanedData;
cleanedDataTable = [cleanedDataTable; cleanedUserData(~invalidDays, :)];
end
% 保存清洗后的数据到新的Excel文件
outputFilename = 'filtered_cleaned_data.xlsx';
writetable(cleanedDataTable, outputFilename);
% 输出被移除的用户列表
disp('Removed users:');
disp(removedUsers);
% 保存被移除的用户列表到文件
removedUsersFile = 'removed_users.txt';
fid = fopen(removedUsersFile, 'w');
fprintf(fid, 'Removed Users:\n');
fprintf(fid, '%d\n', removedUsers);
fclose(fid);
fprintf('Filtered and cleaned data saved to %s\n', outputFilename);
fprintf('Removed users list saved to %s\n', removedUsersFile);
用matlab筛选用电数据
于 2024-08-16 22:06:31 首次发布