大概一年前写代码实现了这个算法,今天有人问我要,在这里贴出来好了。具体算法思路我也忘了,但是代码应该还算清晰。只需要事先了解下STL。 /* 测试数据 21/10 X1 X2 X3 X4 X5 X6 X7 X8 X9 y U1 c 6 y E m h h a m m U2 c 6 n E m m h ma m m U3 c 6 n E m h h ma m m U4 c 4 y E m h h ma l h U5 c 6 n E m m m ma m m U6 c 6 n B m m m a he lo U7 c 6 n E m m h ma he lo U8 s 4 n B sm h lo ma l h U9 c 4 n B sm h lo ma m m U10 c 4 n B sm h m a m m U11 s 4 n E sm h lo ma l h U12 s 4 n E m m m ma m h U13 c 4 n B m m m ma m m U14 s 4 y E sm h h ma m h U15 s 4 n B sm m lo ma m h U16 c 4 y E m m h ma m m U17 c 6 n E m m h a m m U18 c 4 n E m m h a m m U19 s 4 n E sm h m ma m h U20 c 4 n E sm h m ma m h U21 c 4 n B sm h m ma m m */ #include <iostream> #include <fstream> #include <vector> #include <set> #include <string> #include <iomanip> using namespace std; #define DATA_FILE_NAME "../data.txt" #define DEF_ATTR_CNT 10 #define DEF_OBJ_CNT 21 int g_attr_cnt = 0; int g_obj_cnt = 0; class object; vector<string> g_attr_set; vector<bool> g_mask; vector<object *> g_obj_set; vector<string> g_core_set; vector<string> g_b_set; vector<vector<object *> * > g_ind_c_set; vector<vector<object *> * > g_ind_b_set; void clr_mask(const string &attr) { int i = 0; for (i = 0; i < g_attr_cnt; i++) { if (attr == g_attr_set[i]) g_mask[i] = false; else g_mask[i] = true; } } void set_mask() { int i = 0; for (i = 0; i < g_attr_cnt; i++) { g_mask[i] = true; } } void set_mask_set(vector<string> &str_set) { int i = 0, j = 0; bool found = false; for (i = 0; i < g_attr_cnt; i++) { found = false; for (j = 0; j < str_set.size(); j++) { if (g_attr_set[i] == str_set[j]) { found = true; break; } } if (found) g_mask[i] = true; else g_mask[i] = false; } } class object { public: vector<string> attr_val; string obj_name; public: object(const string &name) : obj_name(name) { attr_val.reserve(DEF_ATTR_CNT); } ~object() { } bool operator == (const object &r_obj) const { bool res = true; int i = 0; for (i = 0; i < g_attr_cnt; i++) { if (g_mask[i]) res &= (attr_val[i] == r_obj.attr_val[i]); if (false == res) break; } return res; } }; void read_data() { string strTmp; ifstream data_in(DATA_FILE_NAME,ifstream::in); int i = 0, j = 0; object *p_obj = NULL; g_attr_set.reserve(DEF_ATTR_CNT); g_mask.reserve(DEF_ATTR_CNT); g_obj_set.reserve(DEF_OBJ_CNT); // read object count, and attr count data_in >> strTmp; g_obj_cnt = atoi(strTmp.substr(0, strTmp.find("/")).c_str()); g_attr_cnt = atoi(strTmp.substr(strTmp.find("/") + 1, strTmp.length()).c_str()); for (i = 0; i < g_attr_cnt; i++) { data_in >> strTmp; g_attr_set.push_back(strTmp); g_mask.push_back(true); } for (i = 0; i < g_obj_cnt; i++) { data_in >> strTmp; p_obj = new object(strTmp); for (j = 0; j < g_attr_cnt; j++) { data_in >> strTmp; p_obj->attr_val.push_back(strTmp); } // add object g_obj_set.push_back(p_obj); } // close file data_in.close(); } void print_data() { int i = 0, j = 0; cout << g_obj_cnt << "/" << g_attr_cnt << " "; for (i = 0; i < g_attr_cnt; i++) { cout << setw(4) << setiosflags(ios::left) << g_attr_set[i]; if (i < g_attr_cnt - 1) cout << " "; } cout << endl; cout << "-------------------------------------------------------------------------" << endl; for (i = 0; i < g_obj_cnt; i++) { cout << setw(3) << g_obj_set[i]->obj_name << " "; for (j = 0; j < g_attr_cnt; j++) { cout << setw(3) << setiosflags(ios::left) << g_obj_set[i]->attr_val[j]; if (j < g_attr_cnt - 1) cout << setw(3) << " "; } cout << endl; } } void clear_data() { int i = 0; for (i = 0; i < g_obj_cnt; i++) { delete g_obj_set[i]; g_obj_set[i] = NULL; } } void clear_ind_set(vector<vector<object *> * > &ind_set) { vector<object *> *tmp = NULL; while (!ind_set.empty()) { tmp = ind_set.back(); delete tmp; tmp = NULL; ind_set.pop_back(); } } void app_exit() { clear_ind_set(g_ind_c_set); clear_ind_set(g_ind_b_set); } void add_obj_to_ind_set(vector<vector<object *> * > &ind_set, object *p_obj) { vector<object *> *vec_tmp = NULL; int i = 0; bool found = false; for (i = 0; i < ind_set.size(); i++) { if ((*p_obj) == *((*ind_set[i])[0])) { found = true; ind_set[i]->push_back(p_obj); break; } } if (!found) { vec_tmp = new vector<object *>; vec_tmp->push_back(p_obj); ind_set.push_back(vec_tmp); } } bool obj_set_equal(vector<object *> &l_obj_set, vector<object *> &r_obj_set) { if (l_obj_set.size() != r_obj_set.size()) return false; bool found = false; int i = 0, j = 0; for (i = 0; i < l_obj_set.size(); i++) { found = false; for (j = 0; j < r_obj_set.size(); j++) { if (l_obj_set[i] == r_obj_set[j]) { found = true; break; } } if (!found) return false; } return true; } bool ind_set_equal(vector<vector<object *> * > &l_ind_set, vector<vector<object *> * > &r_ind_set) { if (l_ind_set.size() != r_ind_set.size()) return false; bool found = false; int i = 0, j = 0; for (i = 0; i < l_ind_set.size(); i++) { found = false; for (j = 0; j < r_ind_set.size(); j++) { if (obj_set_equal(*l_ind_set[i], *r_ind_set[j])) { found = true; break; } } if (!found) return false; } return true; } void core() { // get IND(C) int i = 0; vector<vector<object *> * > ind_set_tmp; set_mask(); for (i = 0; i < g_obj_cnt; i++) { add_obj_to_ind_set(g_ind_c_set, g_obj_set[i]); } int j = 0; for (i = 0; i < g_attr_cnt; i++) { clear_ind_set(ind_set_tmp); clr_mask(g_attr_set[i]); // get IND(C-{xi}) for (j = 0; j < g_obj_cnt; j++) { add_obj_to_ind_set(ind_set_tmp, g_obj_set[j]); } if (!ind_set_equal(g_ind_c_set, ind_set_tmp)) g_core_set.push_back(g_attr_set[i]); } clear_ind_set(ind_set_tmp); // already get core cout << "-------------------------------------------------------------------------" << endl; cout << "core:/n{"; for (i = 0; i < g_core_set.size(); i++) { //copy to B g_b_set.push_back(g_core_set[i]); cout << g_core_set[i]; if (i < g_core_set.size() - 1) cout << ", "; } cout << "}/n"; } bool attr_set_equal(vector<string> &l_attr_set, vector<string> &r_attr_set) { if (l_attr_set.size() != r_attr_set.size()) return false; int i = 0, j = 0; bool found = false; for (i = 0; i < l_attr_set.size(); i++) { found = false; for (j = 0; j < r_attr_set.size(); j++) { if (l_attr_set[i] == r_attr_set[j]) { found = true; break; } } if (!found) return false; } return true; } void attr_set_sub(vector<string> &l_attr_set, vector<string> &r_attr_set, vector<string> &res_set) { int i = 0, j = 0; bool found = false; for (i = 0; i < l_attr_set.size(); i++) { found = false; for (j = 0; j < r_attr_set.size(); j++) { if (l_attr_set[i] == r_attr_set[j]) { found = true; break; } } if (!found) res_set.push_back(l_attr_set[i]); } } void get_ind_set(vector<string> &mask_set, vector<vector<object *> * > &res_ind_set) { int i = 0; // get IND set set_mask_set(mask_set); for (i = 0; i < g_obj_cnt; i++) { add_obj_to_ind_set(res_ind_set, g_obj_set[i]); } } void attr_set_copy(vector<string> &dst_attr_set, vector<string> &src_attr_set) { int i = 0; dst_attr_set.clear(); for (i = 0; i < src_attr_set.size(); i++) { dst_attr_set.push_back(src_attr_set[i]); } } void reduction() { vector<string> b_set_tmp; vector<string> c_sub_b; vector<int> max_sig; vector<vector<object *> * > ind_set_tmp; int max = 0, max_idx = 0, max_cnt = 0; int i = 0, j = 0; // get IND(B) get_ind_set(g_b_set, g_ind_b_set); while (!ind_set_equal(g_ind_c_set, g_ind_b_set)) { if (attr_set_equal(g_b_set, g_attr_set)) break; c_sub_b.clear(); attr_set_sub(g_attr_set, g_b_set, c_sub_b); max_sig.clear(); for (i = 0; i < c_sub_b.size(); i++) { attr_set_copy(b_set_tmp, g_b_set); b_set_tmp.push_back(c_sub_b[i]); clear_ind_set(ind_set_tmp); get_ind_set(b_set_tmp, ind_set_tmp); max_sig.push_back(ind_set_tmp.size() - g_ind_b_set.size()); } // now, process max_sig max = max_sig[0]; max_idx = 0; max_cnt = 0; for (j = 1; j < max_sig.size(); j++) { if (max_sig[j] > max) { max = max_sig[j]; max_idx = j; max_cnt = 1; } else if (max == max_sig[j]) { max_cnt++; } } g_b_set.push_back(c_sub_b[max_idx]); if (attr_set_equal(g_b_set, g_attr_set)) break; // get IND(B) clear_ind_set(g_ind_b_set); get_ind_set(g_b_set, g_ind_b_set); } clear_ind_set(ind_set_tmp); // get reduction now cout << "-------------------------------------------------------------------------" << endl; cout << "reduction set:/n{"; for (i = 0; i < g_b_set.size(); i++) { cout << g_b_set[i]; if (i < g_b_set.size() - 1) cout << ", "; } cout << "}/n"; } void print_red_data() { int i = 0, j = 0; set_mask_set(g_b_set); cout << "-------------------------------------------------------------------------" << endl; cout << " "; for (i = 0; i < g_attr_cnt; i++) { if (g_mask[i]) { cout << setw(4) << setiosflags(ios::left) << g_attr_set[i]; if (i < g_attr_cnt - 1) cout << " "; } } cout << endl; cout << "-------------------------------------------------------------------------" << endl; for (i = 0; i < g_obj_cnt; i++) { cout << setw(3) << g_obj_set[i]->obj_name << " "; for (j = 0; j < g_attr_cnt; j++) { if (g_mask[j]) { cout << setw(3) << setiosflags(ios::left) << g_obj_set[i]->attr_val[j]; if (j < g_attr_cnt - 1) cout << setw(3) << " "; } } cout << endl; } } int main() { read_data(); print_data(); core(); reduction(); print_red_data(); clear_data(); app_exit(); }