KWIC索引系统接受一些行,每行有若干字,每个字由若干字符组成;每行都可以循环移位。重复地把第一个字删除,然后接到行末; KWIC把所有行的各种移位情况按照字母表顺序输出。
在网上找了一个基于管道过滤器的实现,但发现有好象错误,修改了一下使之正确,以下是代码:
Filter类
package com.jason.kwic;
import java.io.IOException;
public abstract class Filter implements Runnable {
// 定义输入管道
protected Pipe input;
// 定义输出管道
protected Pipe output;
private boolean isStart = false;
Filter(Pipe input, Pipe output) {
this.input = input;
this.output = output;
}
// 防止多次调用,调用之后线程开始执行
public void start() {
if (!isStart) {
isStart = true;
Thread thread = new Thread(this);
thread.start();
}
}
// 线程的 run 方法
public void run() {
try {
this.transform();
} catch (IOException e) {
e.getMessage();
}
}
// 将输入数据转换为所需数据并写入输出管道
// 由子类实现抽象方法
protected abstract void transform() throws IOException;
}
Pipe类
package com.jason.kwic;
import java.io.IOException;
import java.io.PipedReader;
import java.io.PipedWriter;
import java.io.PrintWriter;
import java.util.Scanner;
public class Pipe {
//输入管道
private Scanner pipereader;
//输出管道
private PrintWriter pipewriter;
public Pipe(){
PipedWriter pw = new PipedWriter();
PipedReader pr = new PipedReader();
try{
pw.connect(pr);
} catch (IOException e){
e.getMessage();
}
pipewriter = new PrintWriter(pw);
pipereader = new Scanner(pr);
}
//读入一行数据到管道
//@return 读入的数据
public String readerLine() throws IOException{
if (pipereader.hasNextLine()) {
return pipereader.nextLine();
}
return null;
}
//从管道输出一行数据
public void writerLine(String strline) throws IOException{
pipewriter.println(strline);
}
//将读管道关闭,调用该方法后,不能再从管道中读数据
//如不能关闭则抛出异
public void closeReader() throws IOException{
pipereader.close();
}
//先刷新数据,在将写管道关闭,调用该方法后,不能向管道中写数据
//如不能关闭则抛出异常
public void closeWriter() throws IOException{
pipewriter.flush();
pipewriter.close();
}
}
Input类:
package com.jason.kwic;
import java.io.File;
import java.io.IOException;
import java.util.Scanner;
public class Input extends Filter{
//输入文件的文件名
private File infile;
Input(File file, Pipe output){
super(null, output);
this.infile = file;
}
@Override
//读取数据
protected void transform() throws IOException {
Scanner sc = new Scanner(infile);
String templine = "";
while(sc.hasNextLine()){
templine = sc.nextLine();
//System.out.println("Input:" + templine);
output.writerLine(templine);
}
output.closeWriter();
sc.close();
}
}
Shift类:
package com.jason.kwic;
import java.io.IOException;
import java.util.ArrayList;
public class Shift extends Filter{
//单词的列表
private ArrayList<String> wordlist = new ArrayList<String>();
//重组后的行的列表
private ArrayList<String> linelist = new ArrayList<String>();
Shift(Pipe input, Pipe output){
super(input, output);
}
@Override
protected void transform() throws IOException {
String templine = "";
//读数据
while((templine = input.readerLine()) != null){
//将数据拆分为不同单词
this.lineSplitWord(templine);
//将单词重组为句子
this.recombination();
//输出重组结果
for(int i = 0; i < linelist.size(); i++){
//System.out.println("linelist:" + linelist.get(i));
output.writerLine(linelist.get(i));
}
//清空wordlist、linelist和templine
wordlist.clear();
linelist.clear();
templine = "";
}
input.closeReader();
output.closeWriter();
}
//从一行中提取单词存入单词表中
private void lineSplitWord(String line){
String word = "";
int i = 0;
while(i < line.length()){
if(line.charAt(i) != ' '){
word += line.charAt(i);
}
else{
wordlist.add(word);
word = "";
}
i++;
}
if (word.length() > 0) {
wordlist.add(word);
}
}
private void recombination(){
for(int j = 0; j < wordlist.size(); j++){
String templine = "";
for (int k = wordlist.size() - 1 - j; k < wordlist.size(); k++){
templine += wordlist.get(k) + " ";
}
for (int m = 0; m < wordlist.size() - 1 - j; m++){
if(m != wordlist.size() - j - 2){
templine += wordlist.get(m) + " ";
}
else{
templine += wordlist.get(m);
}
}
linelist.add(templine);
}
}
}
Alphabetizer类:
package com.jason.kwic;
import java.io.IOException;
//import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
//import java.util.Locale;
public class Alphabetizer extends Filter{
private ArrayList<String> al = new ArrayList<String>();
Alphabetizer(Pipe input, Pipe output){
super(input, output);
}
//对读入的数据进行排序
protected void transform() throws IOException {
String templine = null;
//读入数据
while((templine = input.readerLine()) != null){
al.add(templine);
}
//按字母表排序
Collections.sort(al, new AlphaabetizerComparator());
//对排序后的数据进行输出
for(int i = 0; i < al.size(); i++){
output.writerLine(al.get(i));
}
input.closeReader();
output.closeWriter();
}
//使用java提供的Collator类来实现比较
// private class AlphaabetizerComparator implements Comparator<String> {
//
// private Collator collator;
// AlphaabetizerComparator(){
// this.collator = Collator.getInstance(Locale.ENGLISH);
// }
//
// @Override
// public int compare(String o1, String o2) {
// return this.collator.compare(o1, o2);
// }
//
// }
//自己写代码实现比较(使用字母的ascii值来进行比较)
private class AlphaabetizerComparator implements Comparator<String> {
@Override
public int compare(String o1, String o2) {
if (o1 == null || o2 == null) {
throw new NullPointerException();
}
int compareValue = 0;
char o1FirstCharacter = o1.charAt(0);
char o2FirstCharacter = o2.charAt(0);
if(this.isLetter(o1FirstCharacter) && this.isLetter(o2FirstCharacter)) {
//如果是小写的字母的值,则转成对应的大写的字母的值
o1FirstCharacter = this.toUpperCase(o1FirstCharacter);
o2FirstCharacter = this.toUpperCase(o2FirstCharacter);
compareValue = o1FirstCharacter - o2FirstCharacter;
} else {
throw new RuntimeException("必须是字母");
}
return compareValue;
}
private boolean isLetter(char c) {
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122);
}
private char toUpperCase(char c) {
if (Character.isLowerCase(c)) {
return Character.toUpperCase(c);
}
return c;
}
}
}
Output类:
package com.jason.kwic;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
public class Output extends Filter{
//输出文件的文件名
private File file;
Output(Pipe input, File file){
super(input, null);
this.file = file;
}
//输出数据
protected void transform() throws IOException {
PrintWriter pw = new PrintWriter(file);
String templine = "";
while((templine = input.readerLine()) != null){
pw.write(templine);
pw.write("\n");
}
pw.flush();
pw.close();
input.closeReader();
}
}
Main主程序
package com.jason.kwic;
import java.io.File;
import java.util.Scanner;
public class Main {
public static void main(String[] args) {
File infile = new File("d:\\temp\\mykwic_in.txt");
File outfile = new File("d:\\temp\\mykwic_out.txt");
Scanner inputfile;
Scanner outputfile;
try {
inputfile = new Scanner(infile);
outputfile = new Scanner(outfile);
// 定义三个管道
Pipe pipe1 = new Pipe();
Pipe pipe2 = new Pipe();
Pipe pipe3 = new Pipe();
// 定义四种过滤器
Input input = new Input(infile, pipe1);
Shift shift = new Shift(pipe1, pipe2);
Alphabetizer alph = new Alphabetizer(pipe2, pipe3);
Output output = new Output(pipe3, outfile);
// 启动四种过滤器的线程
// input.start();
// shift.start();
// alph.start();
// output.start();
//不启用线程,顺序执行四个过滤器
input.transform();
shift.transform();
alph.transform();
output.transform();
// 直接输出结果
System.out.println("----- infile -----");
String str = null;
while (inputfile.hasNextLine()) {
str = inputfile.nextLine();
System.out.println(str);
}
System.out.println("input end");
//启用线程时要让当前线程睡一段时间.
//Thread.sleep(3000);
System.out.println("----- outfile -----");
while (outputfile.hasNextLine()) {
str = outputfile.nextLine();
System.out.println(str);
}
inputfile.close();
outputfile.close();
} catch (Exception e) {
// e.getMessage();
e.printStackTrace();
}
}
}
注意其中的infile和outfile对应的路径要修改成实际的路径,并且必须存在这两个文件。