对若干行文字建立倒排索引(根据单词找到所在行号)。
然后根据关键字,在倒排索引查找进行查找,找到包含所有该关键字所在的行数并输出。
输入说明
- 若干行英文,以
!!!!!
为结束。 - 输入一行查询关键字,以1个空格为分隔
输出说明
- 输出所创建倒排索引。索引的key按照字母升序,索引的value按照行号升序
- 输出查询结果。如果找到,输出包含所查询关键字的行集(即,行集中每一行的内容);如果没找到输出
found 0 results
输入样例
where are you from are you ok
this is a test
that is an apple
there are lots of apples you eat it
who are you
!!!!!
you are
eat
you test
abc
输出样例
a=[2]
an=[3]
apple=[3]
apples=[4]
are=[1, 4, 5]
eat=[4]
from=[1]
is=[2, 3]
it=[4]
lots=[4]
of=[4]
ok=[1]
test=[2]
that=[3]
there=[4]
this=[2]
where=[1]
who=[5]
you=[1, 4, 5]
[1, 4, 5]
line 1:where are you from are you ok
line 4:there are lots of apples you eat it
line 5:who are you
[4]
line 4:there are lots of apples you eat it
found 0 results
found 0 results
代码:
import java.util.ArrayList;
import java.util.Scanner;
// 倒排索引的项,用于存储单词及其出现的行号列表
class Info {
public ArrayList<Integer> nums;// 存储单词出现的行号列表
public String word;// 单词本身
public Info(int nums, String word) {
this.nums = new ArrayList<>();
this.nums.add(nums);// 添加行号
this.word = word;
}
// 方法:添加单词出现的行号
public void addWord(int nums) {
this.nums.add(nums);
}
@Override
public String toString() {
return word + "=" + nums;
}
public boolean check(int count) {
return count != this.nums.get(this.nums.size() - 1);
}
}
public class Main {
public static void main(String[] args) {
ArrayList<String> arrayList = new ArrayList<>();
arrayList.add("");// 添加一个空字符串以处理基于 1 的索引
ArrayList<Info> infoArrayList = new ArrayList<>();
int count = 1;
Scanner cin = new Scanner(System.in);
// 构建倒排索引
while (true) {
String line = cin.nextLine();
if (line.equals("!!!!!")) break;// 如果遇到 "!!!!!",则退出循环
arrayList.add(line);
String[] words = line.split(" ");// 将行拆分为单词
// 遍历行中的每个单词
for (String word : words) {
boolean isNewWord = true;
// 遍历倒排索引以检查单词是否已经存储
for (Info value : infoArrayList) {
if (word.equals(value.word)) {
if (value.check(count)) value.addWord(count);
isNewWord = false;
}
}
// 如果单词尚未存储,创建一个新的倒排索引项并添加到列表中
if (isNewWord ) {
Info info = new Info(count, word);
infoArrayList.add(info);
}
}
count ++ ;// 增加行号
}
// 根据单词的字母顺序对倒排索引项列表进行排序
infoArrayList.sort((o1, o2) -> o1.word.compareTo(o2.word));
for (int i = 0; i < infoArrayList.size(); i ++ ) {
System.out.println(infoArrayList.get(i));
}
// 搜索包含特定单词的行
while (cin.hasNext()) {
String words = cin.nextLine();
String[] finds = words.split(" ");
ArrayList<Integer> ans = new ArrayList<>();
for (int i = 1; i < arrayList.size(); i ++ ) {
boolean containsAllKeywords = true;
for (String find : finds) {
if (!arrayList.get(i).contains(find)) {
containsAllKeywords = false;
break;
}
}
// 如果包含所有关键字,将其行号加入结果列表
if (containsAllKeywords) {
ans.add(i);
}
}
// 打印搜索结果
if (ans.isEmpty()) System.out.println("found 0 results");
else {
System.out.println(ans);
for (int value : ans) {
System.out.println("line " + value + ":" + arrayList.get(value));
}
}
}
}
}