数据挖掘概念学习:Candidate-Eliminate算法java实现

Candidate-Eliminate算法类:

package CandidateEliminate; import java.util.ArrayList; import java.util.Map; /** * 候选消除算法 * @author Rowen * @qq 443773264 * @mail luowen3405@163.com */ public class CandidateEliminate { private int nCandAttr; //候选属性个数 private Map<Integer, ArrayList<String>> datas; //待训练的数据集 private ArrayList<ArrayList<String>> GH; //泛化概念集 private ArrayList<ArrayList<String>> SH; //特化概念集 public CandidateEliminate(int nCandAttr, Map<Integer, ArrayList<String>> datas) { this.nCandAttr = nCandAttr; this.datas = datas; GH = new ArrayList<ArrayList<String>>(); SH = new ArrayList<ArrayList<String>>(); initGHSH(); } public Map<Integer, ArrayList<String>> getDatas() { return datas; } public void setDatas(Map<Integer, ArrayList<String>> datas) { this.datas = datas; } public int getNCandAttr() { return nCandAttr; } public void setNCandAttr(int candAttr) { nCandAttr = candAttr; } /** * 初始化泛化概念集GH和特化概念集SH */ public void initGHSH() { ArrayList<String> maxSH = new ArrayList<String>(); for (int i = 0; i < nCandAttr; i++) { maxSH.add("-1"); //-1表示最特化 } ArrayList<String> maxGH = new ArrayList<String>(); for (int i = 0; i < nCandAttr; i++) { maxGH.add("?"); //?表示最泛化 } SH.add(maxSH); GH.add(maxGH); } /** * 判断一个概念是否能够覆盖另一个概念或训练元组 * @param h1 概念 * @param h2 概念或训练元组 * @return 能够覆盖则返回true,反之则返回false */ public boolean include(ArrayList<String> h1, ArrayList<String> h2){ boolean isInclude = true; String e1 = ""; String e2 = ""; for (int i = 0; i < nCandAttr; i++) { e1 = h1.get(i); e2 = h2.get(i); if(!e1.equals(e2) && !e1.equals("?")){ isInclude = false; break; } } return isInclude; } /** * 最小泛化一个概念 * @param sH 待泛化的概念 * @param data 当前训练元组 * @return 最小泛化后的概念 */ public ArrayList<String> minGeneralize(ArrayList<String> sH, ArrayList<String> data){ String h = ""; String e = ""; for (int i = 0; i < nCandAttr; i++) { h = sH.get(i); e = data.get(i); if (!h.equals(e)) { h = (h.equals("-1")) ? e : "?"; sH.set(i, h); } } return sH; } /** * 最小特化一个概念 * @param gH 待特化的概念 * @param data 当前训练元组 * @return 最小特化后的概念 */ public ArrayList<ArrayList<String>> minSpecialize(ArrayList<String> gH, ArrayList<String> data){ ArrayList<ArrayList<String>> minS = new ArrayList<ArrayList<String>>(); String h = ""; String e = ""; ArrayList<String> tempgH = null; ArrayList<String> dv = new ArrayList<String>(); for (int i = 0; i < nCandAttr; i++) { tempgH = new ArrayList<String>(gH); h = tempgH.get(i); e = data.get(i); if (!h.equals(e)) { if (h.equals("?")) { dv = attrDiffValues(i); for (int j = 0; j < dv.size(); j++) { if(!dv.get(j).equals(e)){ tempgH.set(i, dv.get(j)); minS.add(tempgH); } } } } } return minS; } /** * 获取某一指定属性列上的不同值(用于对泛化概念进行最小特化) * @param nAttrIndex 指定的属性列索引 * @return 指定属性列上的不同值集合 */ public ArrayList<String> attrDiffValues(int nAttrIndex){ ArrayList<String> dv = new ArrayList<String>(); String e = ""; for (int i = 0; i < datas.size(); i++) { e = datas.get(i).get(nAttrIndex); if (!dv.contains(e)) { dv.add(e); } } return dv; } /** * 检查最小泛化后的特化概念的泛化程度是否小于泛化集中的所有泛化概念 * @param sH 最小泛化后的特化概念 * @param tempGH 泛化概念集 * @return 是则返回true,否则返回false */ public boolean checkGeneralize(ArrayList<String> sH, ArrayList<ArrayList<String>> tempGH){ boolean isRight = true; for (int i = 0; i < tempGH.size(); i++) { ArrayList<String> gH = tempGH.get(i); if(!include(gH, sH)){ isRight = false; } } return isRight; } /** * 检查最小特化后的泛化概念的特化程度是否小于特化集中的所有特化概念 * @param gH 最小特化后的泛化概念 * @param tempSH 特化概念集 * @return 是则返回true,否则返回false */ public boolean checkSpecialize(ArrayList<String> gH, ArrayList<ArrayList<String>> tempSH){ boolean isRight = true; for (int i = 0; i < tempSH.size(); i++) { ArrayList<String> sH = tempSH.get(i); if(!include(gH, sH)){ isRight = false; } } return isRight; } /** * 对所有的训练元组挖掘概念集 * @return 所有挖掘到的特化概念集和泛化概念集的集合 */ public ArrayList<ArrayList<String>> miningConcepts(){ ArrayList<ArrayList<String>> total = new ArrayList<ArrayList<String>>(); ArrayList<String> data = null; for (int i = 0; i < datas.size(); i++) { data = datas.get(i); if (data.get(nCandAttr).toLowerCase().equals("yes")) { for (int j = 0; j < GH.size(); j++) { if (!include(GH.get(j), data)) { GH.remove(j); } } for (int j = 0; j < SH.size(); j++) { if(!include(SH.get(j), data)){ ArrayList<String> rm = SH.remove(j); ArrayList<String> mg = minGeneralize(rm, data); if (checkGeneralize(mg, GH)) { SH.add(mg); } } } } else { for (int j = 0; j < SH.size(); j++) { if (include(SH.get(j), data)) { SH.remove(j); } } for (int j = 0; j < GH.size(); j++) { if (include(GH.get(j), data)) { ArrayList<String> rm = GH.remove(j); ArrayList<ArrayList<String>> ms = minSpecialize(rm, data); for (int k = 0; k < ms.size(); k++) { if(checkSpecialize(ms.get(k), SH)){ GH.add(ms.get(k)); } } } } } } total.addAll(SH); total.addAll(GH); return total; } }

算法测试类:

package CandidateEliminate; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.StringTokenizer; /** * 候选消除算法测试类 * @author Rowen * @qq 443773264 * @mail luowen3405@163.com */ public class TestCandidateEliminate { /** * 从控制台读取要进行概念挖掘的训练数据 * * @param data:数据集容器 * @return map: 训练集 * @throws IOException */ public Map<Integer, ArrayList<String>> readData( Map<Integer, ArrayList<String>> data) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader( System.in)); String str = ""; while (!(str = reader.readLine()).equals("")) { StringTokenizer tokenizer = new StringTokenizer(str); String key = tokenizer.nextToken(); ArrayList<String> s = new ArrayList<String>(); while (tokenizer.hasMoreTokens()) { s.add(tokenizer.nextToken()); } data.put(Integer.parseInt(key), s); } return data; } /** * 测试程序入口 * @param args */ public static void main(String[] args) { Map<Integer, ArrayList<String>> datas = new HashMap<Integer, ArrayList<String>>(); TestCandidateEliminate t = new TestCandidateEliminate(); try { t.readData(datas); } catch (IOException e) { System.out.println("IOException occurs when reading data!"); } CandidateEliminate ce = new CandidateEliminate(5, datas); ArrayList<ArrayList<String>> concepts = ce.miningConcepts(); ArrayList<String> concept = null; for (int i = 0; i < concepts.size(); i++) { concept = concepts.get(i); for (int j = 0; j < concept.size(); j++) { System.out.print(concept.get(j) + " "); } System.out.println(); } System.out.println(); } }

测试数据:

0 Sunny Warm Normal Strong Warm Yes 1 Sunny Warm High Strong Warm Yes 2 Rainy Cold High Strong Warm No 3 Sunny Warm High Strong Cool Yes

概念挖掘测试结果:

Sunny Warm ? Strong ? Sunny ? ? ? ? ? Warm ? ? ?

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值