项目:code
当当网泄露文件可以网上下载,或者给我留言。
一、实验说明
环境问题,如果在windows上不需要任何配置
如果在Linux上,下面以Ubuntu为例
c.让ubutun支持中文编码
在命令行中输入sudo vi /var/lib/locales/supported.d/local 后按下i键输入以下内容保存
zh_CN.GBK GBK
zh_CN.GB2312 GB2312
zh_CN.GB18030 GB18030
然后在命令中输入:sudo dpkg-reconfigure --force locales
配置eclipse,Windows->Preferences, 然后选择General下面的Workspace. Text file encoding选择Other GBK,如果没有,直接输入GBK三个字母, Apply即可.
二、运行效果
1.先点击创建索引文件
2.使用索引文件检索
3.不使用索引文件检索
三、项目实现
本项目实现有三个类RunApplication类(主程序类),DataSearch类(检索算法实现类)MainFrame(界面布局类),下面将从主程序类开始说明,重点说明检索算法实现类。
1.RunApplication类(主程序类)
package com.dangdang.serach;
/**
* program entry
* @author hsc
*/
public class RunApplication {
private static String file="E:\\dangdang.txt"; //文件位置
public static void main(String[] args) {
MainFrame f = new MainFrame(file);
f.setVisible(true);
}
}
2.主窗体类MainFrame
采用swing技术,构建简单窗体,方便观察实验结果。
package com.dangdang.serach;
import java.awt.Font;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.util.Vector;
import javax.swing.JButton;
import javax.swing.JCheckBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
public class MainFrame extends JFrame implements ActionListener {
private DataSearch ds;
private JScrollPane sPanel;
private Vector vecResult;
private JPanel panel;
private JLabel lab1;
private JButton SerchBtn;
private JButton CreateIndexBtn;
private JTextField txt;
private JCheckBox checkBox;
private JTextArea area;
public MainFrame(String file) {
super();
this.setTitle("dangdang data search");
// set position and size
this.setBounds(300, 300, 800, 500);
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
this.setResizable(false);
// init
mainInit();
ds = new DataSearch(file);
vecResult = new Vector();
}
/**
* init the swing form
*/
public void mainInit() {
panel = new JPanel();
lab1 = new JLabel("please input name:");
lab1.setBounds(30, 10, 200, 30);
SerchBtn = new JButton("search");
SerchBtn.setBounds(440, 10, 80, 30);
SerchBtn.addActionListener(this);
CreateIndexBtn = new JButton("create index file");
CreateIndexBtn.setBounds(600, 10, 150, 30);
CreateIndexBtn.addActionListener(this);
txt = new JTextField();
txt.setBounds(150, 10, 270, 30);
checkBox = new JCheckBox("index");
checkBox.setBounds(530, 10, 70, 30);
area = new JTextArea();
area = new JTextArea("");
area.setVisible(true);
sPanel = new JScrollPane();
sPanel.getViewport().add(area);
sPanel.setBounds(7, 45, 780, 400);
panel.add(lab1);
panel.add(txt);
panel.add(sPanel);
panel.add(checkBox);
panel.add(SerchBtn);
panel.add(CreateIndexBtn);
panel.setLayout(null);
this.add(panel);
}
public void actionPerformed(ActionEvent event) {
if (event.getSource() == SerchBtn) //
{
String searchName = txt.getText().trim();
if (searchName.length() < 2) {
JOptionPane.showMessageDialog(null,
"please enter the corrent name!");
return;
}
long start = System.currentTimeMillis();
area.setText("");
try {
if (checkBox.isSelected()) {
vecResult = ds.selectUseIndex(searchName);
} else {
vecResult = ds.select(searchName);
}
} catch (IOException e) {
e.printStackTrace();
}
for (int i = 0; i < vecResult.size(); i++) {
area.append(vecResult.get(i) + "\n");
}
long end = System.currentTimeMillis();
String tmp = "Total records:" + vecResult.size()
+ ",consuming time:" + (end - start) + "ms";
JOptionPane.showMessageDialog(null, tmp);
} else if (event.getSource() == CreateIndexBtn) {
try {
long start = System.currentTimeMillis();
ds.createIndexFile();
long end = System.currentTimeMillis();
String tmp = "create index file " + "consuming time:"
+ (end - start) + "ms";
JOptionPane.showMessageDialog(null, tmp);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
接下来是本项目的重点和难点,检索算法设计与实现类
3.DataSearch类实现
主要根据姓名检索出相应的记录。
dd.txt 中数据格式:
hgqldjlxm@163.com,李晓明,皇姑区嫩江街44号甲,121,12,86267617,15940531082,29.00
首先,找到第一个和第二个逗的位置,就可以获取姓名。
其次,就可以根据姓名与查询姓名进行匹配,还可以根据姓名建立索引文件。
检索方法:
a.使用普通方法检索,就是一行一行的读取文件,然后再判断该行信息是否符合要求。
b.根据姓名键值建立索引文件,然后查询的时候根据输入姓名键值转换对应到相应的索引文件中去找。
使用姓名前两个字符和对256 求余得索引值(int index = (name.charAt(0) + name.charAt(1)) % 256;),查找先根据查询姓名前两个字符和对256求余得 索引值,然后在对应的索引文件中进行查找。
下面给出检索算法实现类DataSearch 代码
package com.dangdang.serach;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Vector;
public class DataSearch {
private Vector searchResult;
private FileWriter[] writer;
private String file;
private static final int filewriterLength=256;
public DataSearch(String file) {
searchResult = new Vector();
writer = new FileWriter[filewriterLength];
this.file = file;
}
public Vector select(String searchName) throws IOException {
BufferedReader br = null;
String data = null;
long count =1;
searchResult.clear();
br = new BufferedReader(
new InputStreamReader(new FileInputStream(file)));
while ((data = br.readLine()) != null) {
String name = getName(data);
if (name == null) {
continue;
}
if (name.equals(searchName)) {
searchResult.add(data);
}
}
br.close();
// wr.close();
return searchResult;
}
public Vector selectUseIndex(String searchName) throws IOException {
if (searchName == null || searchName.length() < 2) {
return null;
}
int index = (searchName.charAt(0) + searchName.charAt(1)) % 256;
BufferedReader br = null;
String data = null;
searchResult.clear();
br = new BufferedReader(new InputStreamReader(new FileInputStream(
"./index/" + index + ".db")));
while ((data = br.readLine()) != null) {
String tempName = getName(data);
if (tempName != null) {
if (searchName.equals(tempName)) {
searchResult.add(data);
}
}
}
br.close();
return searchResult;
}
public void createIndexFile() throws IOException {
initIndexFile();
BufferedReader br = null;
String data = null;
br = new BufferedReader(
new InputStreamReader(new FileInputStream(file)));
while ((data = br.readLine()) != null) {
writeIndexFile(data);
}
br.close();
closeIndexFile();
}
private void writeIndexFile(String data) throws IOException {
String name = getName(data);
if (name != null) {
int index = (name.charAt(0) + name.charAt(1)) % 256;
writer[index].write(data + "\r\n"); //
}
}
/*
* accord to the every line data parse the name
*/
private String getName(String data) {
int start = data.indexOf(',');
int end = data.indexOf(',', start + 1);
if (end - start < 3) { //
return null;
}
return data.substring(start + 1, end);
}
private boolean initIndexFile() throws IOException {
File file = new File("index");
// if the folder is not exist but create;
if (!file.exists() && !file.isDirectory()) {
file.mkdir();
}
for (int i = 0; i < writer.length; i++) {
writer[i] = new FileWriter("./index/" + i + ".db", false);
}
return true;
}
private void closeIndexFile() throws IOException {
for (int i = 0; i < writer.length; i++) {
if (writer[i] != null) {
writer[i].close();
}
}
}
}
到此项目就完成了。
四.项目小结
1.通过该项目你可以学习到如何处理大数据基本思路.
2.巩固java编程知识,文件操作
3.了解一些ubutun中文编码问题