/**
* 训练器
*
* @author duyf
*
*/
class Train implements Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
public final static String SERIALIZABLE_PATH = "D:\\workspace\\Test\\SogouC.mini\\Sample\\Train.ser";
// 训练集的位置
private String trainPath = "D:\\workspace\\Test\\SogouC.mini\\Sample";
// 类别序号对应的实际名称
private Map classMap = new HashMap();
// 类别对应的txt文本数
private Map classP = new ConcurrentHashMap();
// 所有文本数
private AtomicInteger actCount = new AtomicInteger(0);
// 每个类别对应的词典和频数
private Map> classWordMap = new ConcurrentHashMap>();
// 分词器
private transient Participle participle;
private static Train trainInstance = new Train();
public static Train getInstance() {
trainInstance = new Train();
// 读取序列化在硬盘的本类对象
FileInputStream fis;
try {
File f = new File(SERIALIZABLE_PATH);
if (f.length() != 0) {
fis = new FileInputStream(SERIALIZABLE_PATH);
ObjectInputStream oos = new ObjectInputStream(fis);
trainInstance = (Train) oos.readObject();
trainInstance.participle = new IkParticiple();
} else {
trainInstance = new Train();
}
} catch (Exception e) {
e.printStackTrace();
}
return trainInstance;
}
private Train() {
this.participle = new IkParticiple();
}
public String readtxt(String path) {
BufferedReader br = null;
StringBuilder str = null;
try {
br =