333333333

package com.yxcx.gettrain;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.SortedMap;
import java.util.TreeMap;
//创建训练集合与测试集合
public class CreateTrainAndTestSample {
// 根据包含非特征词的文档集生成只包含特征词的文档集到 trainSpecial 目
录下
ComputeWordsVector cwv = new ComputeWordsVector();
void trainSpecialWords() throws IOException {
File file2 = new
File("/data/classifydata/trainSpecial/" );
if(!file2.exists()){
file2.mkdir();
}
File file3 = new File("/data/classifydata/docVector/" );
if(!file3.exists()){
file3.mkdir();
}
// TODO Auto-generated method stub
String word;
String fileDir = "/data/classifydata/traindata/";
SortedMap<String, Double> wordMap = new TreeMap<String,
Double>();
wordMap = cwv.countWords(fileDir, wordMap);
// 把 wordMap 输出到文件
cwv.printWordMap(wordMap);
File[] sampleDir = new File(fileDir).listFiles();
for (int i = 0; i < sampleDir.length; i++) {
File[] sample = sampleDir[i].listFiles();
String targetDir =
"/data/classifydata/trainSpecial/"
+ sampleDir[i].getName();
File targetDirFile = new File(targetDir);
if (!targetDirFile.exists()) {
targetDirFile.mkdir();
}
for (int j = 0; j < sample.length; j++) {
String fileShortName =
sample[j].getName();
targetDir =
"/data/classifydata/trainSpecial/"
+ sampleDir[i].getName()
+ "/" + fileShortName;
FileWriter tgWriter = new
FileWriter(targetDir);
FileReader samReader = new
FileReader(sample[j]);
BufferedReader samBR = new
BufferedReader(samReader);
while ((word = samBR.readLine()) !=
null) {
if (wordMap.containsKey(word)) {
tgWriter.append(word +
"\n");
}
}
tgWriter.flush();
tgWriter.close();
}
}
}
public void NaiveBayesianClassifierMain(String[] args) throws 
Exception {
CreateTrainAndTestSample ctt = new
CreateTrainAndTestSample();
ctt.trainSpecialWords();
}
public static void main(String[] args) throws Exception {
DataPreProcess DataPP = new DataPreProcess();
DataPP.BPPMain(args);
CreateTrainAndTestSample ctt = new
CreateTrainAndTestSample();
ctt.trainSpecialWords();
}
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值