最近在做java从预料中获取需要信息的的方法,含多个信息,做了个简单的基础类。以供大家学习参考
package com;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NlpRegexTool {
private static final String _END = "##!!&&&&";
/**
* des:正则表达式匹配多个group 例:String mydata = "我要买北京到上海的机票###";//最后一个啊字补位 Pattern
* pattern = Pattern.compile("我要买(.*?)到(.*?)的(.*?)###");
*
* @param content
* @param regxStr
* @param codes
* @param groupCount
* @return
*/
public static Map<String, String> matchMultiGroup(String content,
String regxStr, List<String> codes, int groupCount) {
Map<String, String> codeGroupMap = new HashMap<String, String>();
try {
Pattern pattern = Pattern.compile(regxStr + _END);
Matcher matcher = pattern.matcher(content.trim() + _END);
if (matcher.find()) {
for (int i = 0; i < codes.size(); i++) {
String code = codes.get(i);
codeGroupMap.put(code, matcher.group(i+1));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return codeGroupMap;
}
}
测试方法:
package com;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class TestRegx {
/**
* @param args
*/
public static void main(String[] args) {
testJipiao();
testTianQi();
testLu();
}
public static void testJipiao(){
String content = "我要买北京到上海的机票";
String regxStr = "我要买(.*?)到(.*?)的(.*?)";
List<String> codes = new ArrayList<String>();
codes.add("fromAddress");
codes.add("toAddress");
codes.add("what");
Map<String, String> resultMap = NlpRegexTool.matchMultiGroup(content,
regxStr, codes, 3);
for (String key : resultMap.keySet()) {
System.out.println(key + ":" + resultMap.get(key));
}
System.out.println("----------------------------------");
}
public static void testTianQi(){
String content = "北京的天气怎么样";
String regxStr = "(.*?)的(.*?)怎么样";
List<String> codes = new ArrayList<String>();
codes.add("address");
codes.add("what");
Map<String, String> resultMap = NlpRegexTool.matchMultiGroup(content,
regxStr, codes, 3);
for (String key : resultMap.keySet()) {
System.out.println(key + ":" + resultMap.get(key));
}
}
public static void testLu(){
String content = "北京到纽约怎么去";
String regxStr = "(.*?)到(.*?)怎么(.*?)";
List<String> codes = new ArrayList<String>();
codes.add("fromAddress");
codes.add("toAddress");
codes.add("what");
Map<String, String> resultMap = NlpRegexTool.matchMultiGroup(content,
regxStr, codes, 3);
for (String key : resultMap.keySet()) {
System.out.println(key + ":" + resultMap.get(key));
}
}
}
上海一能信息科技出品