一个关键字标红的通用类

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

/**
* 文本坐标记用的
* tagBegin是开始标记
* tagEnd 是结束标记
* 用了二分法查找来确定单词
* content 是传入的正文
* 正文可以传多次
* 标记词语也可以传多次
* @author Ansj
*
*/
public class TagWord {
private String tagBegin;
private String tagEnd;
Branch frontbegin = null;
Set<String> keyWords = new HashSet<String>();

public TagWord(String begin, String end) {
this.tagBegin = begin;
this.tagEnd = end;
}

public TagWord addKeyWords(String[] keyWord) {
if (keyWord.length > 0) {
for (int i = 0; i < keyWord.length; i++) {
this.keyWords.add(keyWord[i].trim());
}
}
return this;
}

// 是否发现词
boolean findWord = false;

public String getTagContent(String content) {
if (content == null || content.trim().length() == 0
|| keyWords.size() == 0) {
return content;
}
this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
if(frontbegin==null){
return content ;
}
char[] chars = content.toCharArray();
// 正文
StringBuilder sb = new StringBuilder();

WoodInterface head = this.frontbegin;
int start = 0;
int end = 1;
int index = 0;
boolean isBack = false ;
int length = chars.length ;
// 此处是正向最大匹配
for (int i = 0; i < length; i++) {
index++ ;
head = head.get(chars[i]) ;
if(head==null){
if(isBack){
sb.append(tagBegin).append(chars,start,end).append(tagEnd) ;
start = start+end ;
i = start-1 ;
isBack = false ;
}else{
sb.append(chars,start,end) ;
i = start ;
start++ ;
}
head = this.frontbegin ;
index = 0 ;
end = 1 ;
continue ;
}
switch (head.getStatus()) {
case 1:
break ;
case 2:
end = index ;
isBack = true ;
break ;
case 3:
sb.append(tagBegin).append(chars,start,index).append(tagEnd) ;
start = start+index ;
index= 0 ;
end = 1 ;
isBack = false ;
head = this.frontbegin;
break ;
}
}

return sb.toString();
}



public static void main(String[] args) {
String[] keyWords = {"中华人民共和国","孙健","伟大","人民", "中华","万岁" };
long start = System.currentTimeMillis();
for (int i = 0; i < 1; i++) {
String str = new TagWord("<begin>", "<end>").addKeyWords(keyWords)
.getTagContent(
"中华人民共和国是一个伟大的民族我们有振兴民族的需要孙健万岁 . 中 国 万万岁哈哈 。");
System.out.println(str);
}
System.out.println(System.currentTimeMillis() - start);
}

}

class MakeLibrary {

public MakeLibrary() {
}

// 是否有下一个
private static boolean hasNext = true;
// 是否是一个词
private static boolean isWords = true;


Iterator<String> it = null;

public Branch getStringTree(Set<String> keyWords) {
it = keyWords.iterator();
Branch head = new Branch('h', 0, 0);
Branch branch = head ;

while (it.hasNext()) {
char[] chars = it.next().toCharArray();
for (int i = 0; i < chars.length; i++) {
if (chars.length == (i + 1)) {
isWords = true;
hasNext = false;
} else {
isWords = false;
hasNext = true;
}
int status = 1;
if (isWords && hasNext) {
status = 2;
}

if (!isWords && hasNext) {
status = 1;
}

if (isWords && !hasNext) {
status = 3;
}
branch.add(new Branch(chars[i], status, 0));
branch = (Branch) branch.get(chars[i]);
}
branch = head ;
}
return head;
}
}
interface WoodInterface {
public WoodInterface add(WoodInterface branch) ;
public WoodInterface get(char c) ;
public boolean contains(char c) ;
public int compareTo(char c) ;
public boolean equals(char c) ;
public byte getStatus() ;
public char getC() ;
public void setStatus(int status) ;
public byte getNature() ;
public void setNature(byte nature) ;
}


class Branch implements WoodInterface {
/**
* status 此字的状态1,继续 2,是个词语但是还可以继续 ,3确定
* nature 词语性质
* 0.未知 . 1是姓 . 2 是职位名称 3 是数量级的词 . 4 是数字词语 5 是标点
*/
WoodInterface[] branches = new WoodInterface[0];
private char c;
// 状态
private byte status = 1;
// 索引
private short index = -1;
// 词性
private byte nature = 0;
// 单独查找出来的对象
WoodInterface branch = null;

public WoodInterface add(WoodInterface branch) {
if ((this.branch=this.get(branch.getC()))!=null) {
switch (branch.getStatus()) {
case 1:
if(this.branch.getStatus()==2){
this.branch.setStatus(2) ;
}
if(this.branch.getStatus()==3){
this.branch.setStatus(2) ;
}
break;
case 2:
this.branch.setStatus(2) ;
case 3:
if(this.branch.getStatus()==2){
this.branch.setStatus(2) ;
}
if(this.branch.getStatus()==1){
this.branch.setStatus(2) ;
}
}
this.branch.setNature(branch.getNature()) ;
return this.branch;
}
index++;
if ((index + 1) > branches.length) {
branches = Arrays.copyOf(branches, index + 1);
}
branches[index] = branch;
AnsjArrays.sort(branches);
return branch;
}

public Branch(char c, int status, int nature) {
this.c = c;
this.status = (byte) status;
this.nature = (byte) nature;
}

int i = 0;

public WoodInterface get(char c) {
int i = AnsjArrays.binarySearch(branches, c);
if (i > -1) {
return branches[i];
}
return null;
}

public boolean contains(char c) {
if (AnsjArrays.binarySearch(branches, c) > -1) {
return true;
} else {
return false;
}
}

public int compareTo(char c) {
if (this.c > c) {
return 1;
}else if (this.c < c) {
return -1;
}else
return 0 ;
}

public boolean equals(char c) {
if (this.c == c) {
return true;
} else {
return false;
}
}

@Override
public int hashCode() {
// TODO Auto-generated method stub
return c;
}

public byte getStatus() {
return status;
}

public void setStatus(int status) {
this.status = (byte) status;
}

public char getC() {
return this.c;
}

public byte getNature() {
return nature;
}

public void setNature(byte nature) {
this.nature = nature;
}

}
class AnsjArrays {
private static final int INSERTIONSORT_THRESHOLD = 7;

/**
* 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了
*
* @param branches
* @param c
* @return
*/
public static int binarySearch(WoodInterface[] branches, char c) {
int high = branches.length - 1;
if (branches.length < 1) {
return high;
}
int low = 0;
while (low <= high) {
int mid = (low + high) >>> 1;
int cmp = branches[mid].compareTo(c);

if (cmp < 0)
low = mid + 1;
else if (cmp > 0)
high = mid - 1;
else
return mid; // key found
}
return -1; // key not found.
}

public static void sort(WoodInterface[] a) {
WoodInterface[] aux = (WoodInterface[])a.clone();
mergeSort(aux, a, 0, a.length, 0);
}

public static void sort(WoodInterface[] a, int fromIndex, int toIndex) {
rangeCheck(a.length, fromIndex, toIndex);
WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex);
mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
}

private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
if (fromIndex > toIndex)
throw new IllegalArgumentException("fromIndex(" + fromIndex
+ ") > toIndex(" + toIndex + ")");
if (fromIndex < 0)
throw new ArrayIndexOutOfBoundsException(fromIndex);
if (toIndex > arrayLen)
throw new ArrayIndexOutOfBoundsException(toIndex);
}

private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low,
int high, int off) {
int length = high - low;

// Insertion sort on smallest arrays
if (length < INSERTIONSORT_THRESHOLD) {
for (int i = low; i < high; i++)
for (int j = i; j > low
&& (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--)
swap(dest, j, j - 1);
return;
}

// Recursively sort halves of dest into src
int destLow = low;
int destHigh = high;
low += off;
high += off;
int mid = (low + high) >>> 1;
mergeSort(dest, src, low, mid, -off);
mergeSort(dest, src, mid, high, -off);

// If list is already sorted, just copy from src to dest. This is an
// optimization that results in faster sorts for nearly ordered lists.
if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
System.arraycopy(src, low, dest, destLow, length);
return;
}

// Merge sorted halves (now in src) into dest
for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
if (q >= high || p < mid
&& src[p].compareTo(src[q].getC()) <= 0)
dest[i] = src[p++];
else
dest[i] = src[q++];
}
}

/**
* Swaps x[a] with x[b].
*/
private static void swap(WoodInterface[] x, int a, int b) {
WoodInterface t = x[a];
x[a] = x[b];
x[b] = t;
}

public static <T> T[] copyOfRange(T[] original, int from, int to) {
return copyOfRange(original, from, to, (Class<T[]>) original.getClass());
}

public static <T, U> T[] copyOfRange(U[] original, int from, int to,
Class<? extends T[]> newType) {
int newLength = to - from;
if (newLength < 0)
throw new IllegalArgumentException(from + " > " + to);
T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength]
: (T[]) Array
.newInstance(newType.getComponentType(), newLength);
System.arraycopy(original, from, copy, 0, Math.min(original.length
- from, newLength));
return copy;
}
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值