1、多个变量组合与单一目标,实现组合优化:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
%matplotlib inline
# 生成结果字典,转化为DataFrame
result={
"gender":[],
"age":[],
"job":[],
"job_time":[],
"size":[],
"mean":[]
}
metricx="salsry"
data=pd.DataFrame(result)
# 指标之间组合交叉分析
def group_combine(data,dimension,metric):
data_agg=data.groupby(data[list(dimension)])["metric"].agg([np.size,np.mean]).set_index()
return data_agg.to_dict(orient="records")
# 将结果追加到dataframe中
def append_data(agg_dict):
for line in agg_dict:
for element in datframe_elements:
result_dict[element].append(line[element]) if element in line else result_dict[element].append('')
for number in range(len(elements)):
print number
print list(combinations(elements, number+1))
for combination in list(combinations(elements, number+1)):
print list(combination)
print metric
agg_dict = group_aggregation(data = df1, dimension = list(combination), metric = metric)
append_data(agg_dict)