主要实现将英文文本拆分,调用有道词典查询接口查询重点名词,动词,形容词,副词的功能。对重点单词的界定比较有待商榷,这里只是将小于5个字符长度的单词去除,但实际中肯定不是这样的。
package com.utopian.thankyou.end;
import org.apache.http.client.HttpClient;
import java.io.IOException;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
/**
* Created by Mrs.Utopian on 2019/1/3.
*/
public class CommonUtil {
/**
* 空格逗号句号分号拆分字符串
*/
public String [] wordArray(String str){
String [] arr = str.split("(\\,|\\.|\\;)*[\\s+]");
for(String ss : arr){
System.out.println(ss);
}
return arr;
}
/**
* 查询词义
*/
public static String [] getMeansByYouDao(String [] words){
String word = null;
int i = 0;
HttpClient client = HttpClients.createDefault();
String [] means = new String[words.length];
int meansCount = -1; //用来控制需要查询的单词的个数
while (i<words.length) {
if(words[i].length()<5){
//设定长度小于5个字符的单词为简单词 不查简单词
i++;continue;
}
word = words[i++];meansCount++;
HttpGet get = new HttpGet(
"http://fanyi.youdao.com/openapi.do?keyfrom=youdao111&key=60638690&type=data&doctype=xml&version=1.1&q="
+ word);
try {
HttpResponse response = client.execute(get);
HttpEntity entity = response.getEntity();
String str = EntityUtils.toString(entity, "UTF-8");
Element root = DocumentHelper.parseText(str).getRootElement();
Element query = root.element("query");
List clist = root.elements();
Element paragraph = (Element) root.element("translation")
.element("paragraph");
//用来判断返回的是翻译,还是单词
if (clist.size() > 3) {
means[meansCount] = word+":";
List<Element> exlists = root.element("basic")
.element("explains").elements();
for (Element e : exlists) {
means[meansCount] = means[meansCount]+e.getText();
}
}else {
means[meansCount] = word+":"+paragraph.getText();
}
} catch (ClientProtocolException e) {
i = words.length;
e.printStackTrace();
} catch (IOException e) {
i = words.length;
e.printStackTrace();
} catch (DocumentException e1) {
i = words.length;
e1.printStackTrace();
}
}
return means;
}
public static void main(String args[]){
//System.out.print(isSubject("Chinese"));
CommonUtil commonUtil = new CommonUtil();
String text="Cherish all your happy moments; they make a fine cushion for old age.";//原文
String words[] = commonUtil.wordArray(text);
String means[] = commonUtil.getMeansByYouDao(words);
String nouns[] = new String[means.length]; //名词
String verbs[] = new String[means.length]; //动词
String ads[] = new String[means.length]; //形容词或副词
int nounsCount=0;
int verbsCount=0;
int adsCount=0;
for(int i = 0; i < means.length&&means[i]!=null;i++){
//System.out.println(means[i]);
//整理出名词n. 动词vt./vi./v. 形容词/副词 adj./adv.
if(means[i].contains("n.")){
nouns[nounsCount++] = means[i];
}
if(means[i].contains("vt.")||means[i].contains("vi.")||means[i].contains("v.")){
verbs[verbsCount++] = means[i];
}
if(means[i].contains("adj.")||means[i].contains("adv.")){
ads[adsCount++] = means[i];
}
}
System.out.println("原文" + text);
System.out.println("输出所有的名词");
for(int i = 0;i<nounsCount;i++){
System.out.println(nouns[i]);
}
System.out.println("输出所有的动词");
for(int i = 0;i<verbsCount;i++){
System.out.println(verbs[i]);
}
System.out.println("输出所有的形容词、副词");
for(int i = 0;i<adsCount;i++){
System.out.println(ads[i]);
}
}
}
输出的结果如下: