本版本文档相似性系统存在不足:比较相似率比较高,比较效果不太明显。有些doc文档读取出现报错 问题!!
算法还有待完善。。。会加强探究,解决问题。
请耐心等候
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.text.DecimalFormat;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
@SuppressWarnings("serial")
public class Jframe extends JFrame implements ActionListener {
/***********
* 定义按钮、文本框、文本域、滚动条、
* 文件选择对话框、
* *************/
static JButton jb1;
static JTextField jtf1;
static JButton jb2;
static JTextField jtf2;
static JButton jb3;
static JTextField jtf3;
static JTextArea jta;
JScrollPane jsp;
JFrame jf;
static JFileChooser filechoose;
static File file;
public static String path1;
public static String path2;
static String text2;
static String text3;
//定义一个数字格式化对象,格式化模板为".00",即保留2位小数.
static DecimalFormat df = new DecimalFormat( "0.00 ");
public Jframe() {
init();
}
public void init() {
/**********
* 实例化JFrame框架、按钮、文本框
* 文本域、滚动条 **********/
jf = new JFrame("文件相似度比较");
JPanel jpanel = new JPanel();
jpanel.setLayout(new GridLayout(3, 3, 5, 5));
jb1 = new JButton("选择文件1");
jtf1 = new JTextField(10);
jb2 = new JButton("选择文件2");
jtf2 = new JTextField(10);
jb3 = new JButton("比较文件夹所有文件");
jtf3 = new JTextField(10);
jta = new JTextArea(100, 35);
jta.setLineWrap(true);
jsp = new JScrollPane(jta, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS,
JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
jpanel.add(jb1); //把按钮与文本框添加到jpanel
jpanel.add(jtf1);
jpanel.add(jb2);
jpanel.add(jtf2);
jpanel.add(jb3);
jpanel.add(jtf3);
JPanel jparea = new JPanel();
jparea.add(jsp); //把滚动条添加到面板jparea
jb1.addActionListener(this); //为按钮增添监听器
jb2.addActionListener(this);
jb3.addActionListener(this);
jf.setLayout(new BorderLayout());
jf.add(jpanel, "North"); //把面板jpanel与面板jparea添加到JFrame框架
jf.add(jparea, "Center");//
jf.add(jsp); //把滚动条加到JFrame框架
jf.setBackground(Color.white);
jf.setSize(350, 400);
jf.setVisible(true);
jf.setLocation(650, 350);
jf.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
@Override
public void actionPerformed(ActionEvent e) {
// TODO Auto-generated method stub
filechoose = new JFileChooser(); //实例化文件选择对话框
@SuppressWarnings("unused")
wordortxt world = new wordortxt(); //实例化类wordtxt
@SuppressWarnings("unused")
CosineSimilarAlgorithm jisuan = new CosineSimilarAlgorithm();//实例化类CosineSimilarAlgorithm
if (e.getActionCommand().equals("选择文件1")) { //实现按钮监听事件
//文件选择方式为文件和文件夹
filechoose.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
filechoose.showOpenDialog(null); //弹出文件选择对话框
file = filechoose.getSelectedFile();
if (file != null) { //判断选择是否为空
if (file.isFile()) { //判断选择是否为文件
path1 = file.getAbsolutePath(); //获得选择文件路径
jtf1.setText(path1);
text2 = judge(path1); //调用judge()方法
}
}
}
if (e.getActionCommand().equals("选择文件2")) { //实现按钮监听事件
filechoose.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
filechoose.showOpenDialog(null);
file = filechoose.getSelectedFile();
if (file != null) {
if (file.isFile()) {
path2 = file.getAbsolutePath();
jtf2.setText(path2);
text3 = judge(path2);
double value = CosineSimilarAlgorithm.getSimilarity(text2, text3);
jta.append("相似度:" + df.format(value*100) + "%" + "\n");
}
}
}
if (e.getActionCommand().equals("比较文件夹所有文件")) { //实现按钮监听事件
filechoose.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
filechoose.showOpenDialog(null);
file = filechoose.getSelectedFile();
if (file != null) {
if (file.isDirectory()) { //判断选择是否为文件夹
getFile1();
}
}
}
}
public static void getFile1() {
int j, i;
String path = file.getAbsolutePath();
File[] f = file.listFiles(); //获得文件夹下的所有文件
jtf3.setText(path);
String[] temp = new String[f.length];
System.out.println("文件个数" + f.length);
for (i = 0; i < temp.length; i++) {
temp[i] = String.valueOf(f[i].getAbsolutePath()); //循环把文件路径保存到数组temp[]
text2 = judge(temp[i]); //调用judge()方法返回字符串text2
String nafile1 = f[i].getName(); //获得文件名
//System.out.println(temp[i]);
for (j = i + 1; j < temp.length; j++) { //循环获得下一个文件与temp[i]比较
temp[j] = String.valueOf(f[j].getAbsolutePath());
String nafile2 = f[j].getName();
text3 = judge(temp[j]);
double value = CosineSimilarAlgorithm.getSimilarity(text2, text3);
if(value > 0.7){ //把相似度大于0.7的文件输出到文本域
jta.append("相似度:" + " " + nafile1 + " 与 " + " " +nafile2 + " "
+df.format(value*100)+ "%"+"\n");
}
}
}
}
public static String judge(String str) { //判断文件是否为doc、txt文本、或者PDF并返回一个字符串
String text = null;
if (str.endsWith("doc")) {
try {
text = wordortxt.readDoc(str);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
if (str.endsWith("txt")) {
try {
text = wordortxt.readtxt(str);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
else if (str.endsWith("pdf")) {
try {
text = wordortxt.readPDF(str);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
return text;
}
}