要在函数内部实现将 saved_feature_combinations
转换为元组形式,可以在检查 saved_feature_combinations
是否为空后,直接将它们转换为元组。你可以在现有逻辑的基础上添加转换步骤。
def process_and_save_combinations(self, saved_feature_combinations, saved_normalized_data, all_feature_combinations, feature_combinations_0, config, start_date, end_date):
"""
处理并保存特征组合。
流程:
检查已保存的特征工程数据
找到与新传入特征组合的差异
是(有差异):删除多余的和不再用的特征组合
否(无差异):保留不变的特征组合
执行特征工程
参数:
- saved_feature_combinations: 已保存的特征组合
- saved_normalized_data: 已保存的标准化数据
- all_feature_combinations: 所有特征组合
- feature_combinations_0: 初始特征组合
返回:
- unique_feature_combinations: 独特的特征组合
- unique_normalized_data: 独特的标准化数据
"""
try:
# 检查 saved_feature_combinations 是否存在并转换为元组形式
if saved_feature_combinations:
saved_feature_combinations = [
tuple(feature.split(',')) if isinstance(feature, str) else feature
for feature in saved_feature_combinations
]
# 如果存在已保存的特征工程数据,但 normalized_data 为空,处理组合
if saved_feature_combinations and saved_normalized_data.empty:
print("Saved data is present, processing combinations...")
feature_combinations, normalized_data = self.process_combinations(
saved_feature_combinations, saved_normalized_data, all_feature_combinations
)
if isinstance(self.processor, NZDataProcessor):
# 如果processor是NZDataProcessor, 则传递config, start_date, end_date
new_feature_combinations, new_normalized_data = self.perform_engineering_with_params(
feature_combinations_0, config, start_date, end_date
)
else:
# 否则,不传递这些参数
new_feature_combinations, new_normalized_data = self.perform_engineering_without_params(
feature_combinations_0
)
log_info_message(f"---读取成功,执行完特征工程得到的值:{feature_combinations}")
combined_feature_combinations = feature_combinations + new_feature_combinations
combined_normalized_data = normalized_data + new_normalized_data
unique_feature_combinations = list(set(combined_feature_combinations))
unique_normalized_data = [
combined_normalized_data[combined_feature_combinations.index(comb)]
for comb in unique_feature_combinations
]
self.save_results(unique_feature_combinations, unique_normalized_data)
else:
if isinstance(self.processor, NZDataProcessor):
# 调用带参数的特征工程方法
feature_combinations, normalized_data = self.perform_engineering_with_params(
feature_combinations_0, config, start_date, end_date
)
else:
# 调用不带参数的特征工程方法
feature_combinations, normalized_data = self.perform_engineering_without_params(
feature_combinations_0
)
print(f"---读取不成功,执行完特征工程得到的值:{feature_combinations}")
unique_feature_combinations = feature_combinations
unique_normalized_data = normalized_data
self.save_results(unique_feature_combinations, unique_normalized_data)
except Exception as e:
print(f"处理和保存特征组合时出错: {e}")
import traceback
traceback.print_exc() # 打印完整的错误堆栈信息
return [], [] # 在异常情况下返回空列表
return unique_feature_combinations, unique_normalized_data