Create an inverted index with given documents.
这是一道字符串相关的题目,实际上并不难,其中的一个主要的问题是正则表达式的使用,对于java来说可以直接使用string中的方法split(“\\s+”)作为切分方式,在python需要导入re模块
java
/**
* Definition of Document:
* class Document {
* public int id;
* public String content;
* }
*/
public class Solution {
/**
* @param docs a list of documents
* @return an inverted index
*/
public Map<String, List<Integer>> invertedIndex(List<Document> docs) {
// Write your code here
Map<String, List<Integer>> map = new HashMap<>();
int id;
String str;
String[] arr;
for (int i = 0; i < docs.size(); i++) {
id = docs.get(i).id;
str = docs.get(i).content;
arr = str.trim().split("\\s+");
for (int j = 0; j < arr.length; j++) {
util(map, arr[j], id);
}
}
return map;
}
private void util(Map<String, List<Integer>> map,
String str,
int id) {
if (!map.containsKey(str)) {
List<Integer> list = new ArrayList<>();
list.add(id);
map.put(str, list);
} else {
List<Integer> list = map.get(str);
if (list.get(list.size() - 1) == id) {
return;
} else {
list.add(id);
map.put(str, list);
}
}
}
}
python
'''
Definition of Document
class Document:
def __init__(self, id, cotent):
self.id = id
self.content = content
'''
import re
class Solution:
# @param {Document[]} docs a list of documents
# @return {dict(string, int[])} an inverted index
def invertedIndex(self, docs):
# Write your code here
mapping = {}
for i in range(len(docs)):
idd = docs[i].id
content = docs[i].content.strip()
arr = re.split('\\s+', content)
for ele in arr:
self.util(mapping, ele, idd)
return mapping
def util(self, mapping, string, idd):
if string not in mapping:
mapping[string] = [idd]
else:
path = mapping[string]
if path[-1] == idd:
return
else:
path.append(idd)
mapping[string] = path