一、需求
实现英文单词的单数变复数,复数变单数形式,如orange -> oranges,oranges -> orange。
二、API功能
1、单数 -> 复数
String plural = EnglishWordUtil.pluralize("map");
2、是否为单数
boolean isSingular = EnglishWordUtil.isSingular("map");
3、复数 -> 单数
String singular = EnglishWordUtil.singularize("maps");
4、是否为复数
boolean isPlural = EnglishWordUtil.isPlural("maps");
三、支持的单复数转换类型
0、单复数同形
deer,shoes,sheep,Chinese,news…
1、单数复数的不规则变化
person,man,child,foot…
2、直接加"-s"的x国人
German,Russian,American…
3、以s, ss,x, ch, sh结尾的名词加"-es"
bus,class,watch…
4、以元音字母+o结尾(除studio),后面加"-s"
zoo,kangaroo…
5、以辅音字母+o结尾(除studio,piano,kilo,photo),后面加"-es"
hero,potato,tomato…
6、以辅音字母加y结尾的名词,变y为i加"-es"
city,factory,family…
7、以元音字母加y结尾的名词直接加"-s"
boy,toy,monkey…
8、除了roof,gulf,proof,beef,staff,belief,cliff,以fe或f结尾的名词,把fe或f变为v加"-es"
knife,wolf,life,cliff…
9、无连字号复合名词,后面名词变复数
mooncake,gentleman,housewife…
10、普通名词,加s
map,orange…
四、代码(Java正则实现)
工具类:
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// 正则模式和对应的替换字符串
class PatternAndReplacement {
private Pattern pattern;
private String replacement;
public PatternAndReplacement(Pattern pattern, String replacement) {
this.pattern = pattern;
this.replacement = replacement;
}
public Pattern getPattern() {
return pattern;
}
public void setPattern(Pattern pattern) {
this.pattern = pattern;
}
public String getReplacement() {
return replacement;
}
public void setReplacement(String replacement) {
this.replacement = replacement;
}
}
// 英文单词单数、复数形式转换
public class EnglishWordUtil {
//一、单复数同形
private static List<String> uncountables = new ArrayList<>();
//二、单数 -> 复数的不规则变化
private static List<PatternAndReplacement> unregularSingularToPlural = new ArrayList<>();
//三、单数 -> 复数
private static List<PatternAndReplacement> singularToPlural = new ArrayList<>();
//四、复数 -> 单数的不规则变化
private static List<PatternAndReplacement> unregularPluralToSingular = new ArrayList<>();
//五、复数 -> 单数
private static List<PatternAndReplacement> pluralToSingular = new ArrayList<>();
static {
//一、单复数同形
uncountables.add("deer");
uncountables.add("shoes");
uncountables.add("sheep");
uncountables.add("fish");
uncountables.add("Chinese");
uncountables.add("English");
uncountables.add("Portuguese");
uncountables.add("Lebanese");
uncountables.add("Swiss");
uncountables.add("Vietnamese");
uncountables.add("Japanese");
uncountables.add("economics");
uncountables.add("news");
uncountables.add("human");
//二、单数 -> 复数的不规则变化
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^person$", Pattern.CASE_INSENSITIVE), "people"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("([^(Ger)])man", Pattern.CASE_INSENSITIVE), "$1men"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^man$", Pattern.CASE_INSENSITIVE), "men"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^child$", Pattern.CASE_INSENSITIVE), "children"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^foot$", Pattern.CASE_INSENSITIVE), "feet"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^tooth$", Pattern.CASE_INSENSITIVE), "teeth"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^(m|l)ouse$", Pattern.CASE_INSENSITIVE), "$1ice"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^matrix$", Pattern.CASE_INSENSITIVE), "matrices"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^vertex$", Pattern.CASE_INSENSITIVE), "vertices"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^ox$", Pattern.CASE_INSENSITIVE), "oxen"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^goose$", Pattern.CASE_INSENSITIVE), "geese"));
unregularSingularToPlural.add(new PatternAndReplacement(Pattern.compile("^basis$", Pattern.CASE_INSENSITIVE), "bases"));
//三、单数 -> 复数
//2、直接加"-s"的x国人
singularToPlural.add(new PatternAndReplacement(Pattern.compile("^((German)|(Russian)|(American)|(Italian)|(Indian)|(Canadian)|(Australian)|(Swede))$", Pattern.CASE_INSENSITIVE), "$1s"));
//3、以s, ss,x, ch, sh结尾的名词加"-es"
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(s|ss|x|ch|sh)$", Pattern.CASE_INSENSITIVE), "$1es"));
//4、以元音字母+o结尾(除studio),后面加"-s"
singularToPlural.add(new PatternAndReplacement(Pattern.compile("^studio$", Pattern.CASE_INSENSITIVE), "studios"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("([aeiou])o$", Pattern.CASE_INSENSITIVE), "$1os"));
//5、以辅音字母+o结尾(除studio,piano,kilo,photo),后面加"-es"
singularToPlural.add(new PatternAndReplacement(Pattern.compile("^((pian)|(kil)|(phot))o$", Pattern.CASE_INSENSITIVE), "$1os"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("([^aeiou])o$", Pattern.CASE_INSENSITIVE), "$1oes"));
//6、以辅音字母加y结尾的名词,变y为i加"-es "
singularToPlural.add(new PatternAndReplacement(Pattern.compile("([^aeiou])y$", Pattern.CASE_INSENSITIVE), "$1ies"));
//7、以元音字母加y结尾的名词直接加"-s"
singularToPlural.add(new PatternAndReplacement(Pattern.compile("([aeiou])y$", Pattern.CASE_INSENSITIVE), "$1ys"));
//8、除了roof,gulf,proof,beef,staff,belief,cliff
//以fe或f结尾的名词,把fe或f变为v加"-es"
singularToPlural.add(new PatternAndReplacement(Pattern.compile("^((roo)|(gul)|(proo)|(bee)|(staf)|(belie)|(clif))f$", Pattern.CASE_INSENSITIVE), "$1fs"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(fe|f)$", Pattern.CASE_INSENSITIVE), "ves"));
//9、无连字号复合名词,后面名词变复数
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(cake)$", Pattern.CASE_INSENSITIVE), "cakes"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(watch)$", Pattern.CASE_INSENSITIVE), "watches"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(chair)$", Pattern.CASE_INSENSITIVE), "chairs"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(man)$", Pattern.CASE_INSENSITIVE), "men"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(wife)$", Pattern.CASE_INSENSITIVE), "wives"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(glass)$", Pattern.CASE_INSENSITIVE), "glasses"));
singularToPlural.add(new PatternAndReplacement(Pattern.compile("(house)$", Pattern.CASE_INSENSITIVE), "houses"));
//四、复数 -> 单数的不规则变化
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^people$", Pattern.CASE_INSENSITIVE), "person"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("([^(Ger)])men", Pattern.CASE_INSENSITIVE), "$1man"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^men$", Pattern.CASE_INSENSITIVE), "man"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^children$", Pattern.CASE_INSENSITIVE), "child"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^feet$", Pattern.CASE_INSENSITIVE), "foot"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^teeth$", Pattern.CASE_INSENSITIVE), "tooth"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^(m|l)ice$", Pattern.CASE_INSENSITIVE), "$1ouse"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^matrices$", Pattern.CASE_INSENSITIVE), "matrix"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^vertices$", Pattern.CASE_INSENSITIVE), "vertex"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^oxen$", Pattern.CASE_INSENSITIVE), "ox"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^geese$", Pattern.CASE_INSENSITIVE), "goose"));
unregularPluralToSingular.add(new PatternAndReplacement(Pattern.compile("^bases$", Pattern.CASE_INSENSITIVE), "basis"));
//五、复数 -> 单数
//9、无连字号复合名词,后面名词变复数
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(cakes)$", Pattern.CASE_INSENSITIVE), "cake"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(watches)$", Pattern.CASE_INSENSITIVE), "watch"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(chairs)$", Pattern.CASE_INSENSITIVE), "chair"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(men)$", Pattern.CASE_INSENSITIVE), "man"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(wives)$", Pattern.CASE_INSENSITIVE), "wife"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(glasses)$", Pattern.CASE_INSENSITIVE), "glass"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(houses)$", Pattern.CASE_INSENSITIVE), "house"));
//2、直接加"-s"的x国人
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^((German)|(Russian)|(American)|(Italian)|(Indian)|(Canadian)|(Australian)|(Swede))s$", Pattern.CASE_INSENSITIVE), "$1"));
//3、以s, ss,x, ch, sh结尾的名词加"-es"
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("houses$", Pattern.CASE_INSENSITIVE), "house"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("(s|ss|x|ch|sh)es$", Pattern.CASE_INSENSITIVE), "$1"));
//4、以元音字母+o结尾(除studio),后面加"-s"
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^studios", Pattern.CASE_INSENSITIVE), "studio"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^([aeiou])os$", Pattern.CASE_INSENSITIVE), "$1"));
//5、以辅音字母+o结尾(除studio,piano,kilo,photo),后面加"-es"
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^((pian)|(kil)|(phot))os$", Pattern.CASE_INSENSITIVE), "$1o"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("([^aeiou])oes$", Pattern.CASE_INSENSITIVE), "$1o"));
//6、以辅音字母加y结尾的名词,变y为i加"-es "
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("([^aeiou])ies$", Pattern.CASE_INSENSITIVE), "$1y"));
//7、以元音字母加y结尾的名词直接加"-s"
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^([aeiou])ys$", Pattern.CASE_INSENSITIVE), "$1"));
//8、除了roof,gulf,proof,beef,staff,belief,cliff
//以fe或f结尾的名词,把fe或f变为v加"-es"
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^((roo)|(gul)|(proo)|(bee)|(staf)|(belie)|(clif))fs$", Pattern.CASE_INSENSITIVE), "$1f"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("^((kni)|(wi)|(li))ves$", Pattern.CASE_INSENSITIVE), "$1fe"));
pluralToSingular.add(new PatternAndReplacement(Pattern.compile("ves$", Pattern.CASE_INSENSITIVE), "f"));
}
// 是否为单数形式
public static boolean isSingular(String word) {
if (word == null || word.length() == 0) {
return false;
}
// 1、单复数同形
if (uncountables.contains(word)) {
return true;
}
//2、不规则变化
for (PatternAndReplacement unregular : unregularSingularToPlural) {
Matcher matcher = unregular.getPattern().matcher(word);
if (matcher.find()) {
return true;
}
}
//3、规则变化
for (PatternAndReplacement regular : singularToPlural) {
Matcher matcher = regular.getPattern().matcher(word);
if (matcher.find()) {
return true;
}
}
return word.charAt(word.length()-1) != 's';
}
// 单数 -> 复数
public static String pluralize(String word) {
if (word == null || word.length() == 0) {
return "";
}
//1、单复数同形
if (uncountables.contains(word)) {
return word;
}
//2、不规则变化
for (PatternAndReplacement unregular : unregularSingularToPlural) {
Matcher matcher = unregular.getPattern().matcher(word);
if (matcher.find()) {
return matcher.replaceAll(unregular.getReplacement());
}
}
//3、规则变化
for (PatternAndReplacement regular : singularToPlural) {
Matcher matcher = regular.getPattern().matcher(word);
if (matcher.find()) {
return matcher.replaceAll(regular.getReplacement());
}
}
return word + "s";
}
// 是否为复数形式
public static boolean isPlural(String word) {
if (word == null || word.length() == 0) {
return false;
}
// 1、单复数同形
if (uncountables.contains(word)) {
return true;
}
//2、不规则变化
for (PatternAndReplacement unregular : unregularPluralToSingular) {
Matcher matcher = unregular.getPattern().matcher(word);
if (matcher.find()) {
return true;
}
}
//3、规则变化
for (PatternAndReplacement regular : pluralToSingular) {
Matcher matcher = regular.getPattern().matcher(word);
if (matcher.find()) {
return true;
}
}
return word.charAt(word.length()-1) == 's';
}
// 复数 -> 单数
public static String singularize(String word) {
if (word == null || word.length() == 0) {
return "";
}
//1、单复数同形
if (uncountables.contains(word)) {
return word;
}
//2、不规则变化
for (PatternAndReplacement unregular : unregularPluralToSingular) {
Matcher matcher = unregular.getPattern().matcher(word);
if (matcher.find()) {
return matcher.replaceAll(unregular.getReplacement());
}
}
//3、规则变化
for (PatternAndReplacement regular : pluralToSingular) {
Matcher matcher = regular.getPattern().matcher(word);
if (matcher.find()) {
return matcher.replaceAll(regular.getReplacement());
}
}
//去掉最后一个字母s
return word.substring(0, word.length()-1);
}
}
五、测试
import java.util.HashMap;
// EnglishWordUtil测试类
public class EnglishWordUtilTest {
public static void main(String[] args) {
//0、单复数同形
HashMap<String, String> map0 = new HashMap<>();
map0.put("deer", "deer");
map0.put("shoes", "shoes");
map0.put("sheep", "sheep");
map0.put("fish", "fish");
map0.put("Chinese", "Chinese");
map0.put("English", "English");
map0.put("Portuguese", "Portuguese");
map0.put("Lebanese", "Lebanese");
map0.put("Swiss", "Swiss");
map0.put("Vietnamese", "Vietnamese");
map0.put("Japanese", "Japanese");
map0.put("economics", "economics");
map0.put("news", "news");
map0.put("human", "human");
//1、单数复数的不规则变化
HashMap<String, String> map1 = new HashMap<>();
map1.put("person", "people");
map1.put("man", "men");
map1.put("child", "children");
map1.put("foot", "feet");
map1.put("tooth", "teeth");
map1.put("mouse", "mice");
map1.put("louse", "lice");
map1.put("matrix", "matrices");
map1.put("vertex", "vertices");
map1.put("ox", "oxen");
map1.put("goose", "geese");
map1.put("basis", "bases");
//2、直接加"-s"的x国人
HashMap<String, String> map2 = new HashMap<>();
map2.put("German", "Germans");
map2.put("Russian", "Russians");
map2.put("American", "Americans");
map2.put("Italian", "Italians");
map2.put("Indian", "Indians");
map2.put("Canadian", "Canadians");
map2.put("Australian", "Australians");
map2.put("Swede", "Swedes");
//3、以s, x, ch, sh结尾,加"-es"
HashMap<String, String> map3 = new HashMap<>();
map3.put("bus", "buses");
map3.put("class", "classes");
map3.put("box", "boxes");
map3.put("sex", "sexes");
map3.put("watch", "watches");
map3.put("brush", "brushes");
//4、以元音字母+o结尾,后面加"-s"
HashMap<String, String> map4 = new HashMap<>();
map4.put("zoo", "zoos");
map4.put("photo", "photos");
map4.put("kangaroo", "kangaroos");
map4.put("bamboo", "bamboos");
map4.put("radio", "radios");
map4.put("piano", "pianos");
map4.put("video", "videos");
map4.put("kilo", "kilos");
map4.put("studio", "studios");
//5、以辅音字母+o结尾(除studio,piano,kilo,photo),后面加"-es"
HashMap<String, String> map5 = new HashMap<>();
map5.put("hero", "heroes");
map5.put("Negro", "Negroes");
map5.put("tomato", "tomatoes");
map5.put("potato", "potatoes");
map5.put("mango", "mangoes");
//6、以辅音字母加y结尾的名词,变y为i加"-es "
HashMap<String, String> map6 = new HashMap<>();
map6.put("baby", "babies");
map6.put("city", "cities");
map6.put("factory", "factories");
map6.put("family", "families");
//7、以元音字母加y结尾的名词直接加"-s"
HashMap<String, String> map7 = new HashMap<>();
map7.put("boy", "boys");
map7.put("toy", "toys");
map7.put("monkey", "monkeys");
//8、除了roof,gulf,proof,beef,staff,belief,cliff
//以fe或f结尾的名词,把fe或f变为v加"-es"
HashMap<String, String> map8 = new HashMap<>();
map8.put("roof", "roofs");
map8.put("gulf", "gulfs");
map8.put("proof", "proofs");
map8.put("beef", "beefs");
map8.put("staff", "staffs");
map8.put("belief", "beliefs");
map8.put("cliff", "cliffs");
map8.put("knife", "knives");
map8.put("wife", "wives");
map8.put("wolf", "wolves");
map8.put("life", "lives");
//9、无连字号复合名词,后面名词变复数
HashMap<String, String> map9 = new HashMap<>();
map9.put("mooncake", "mooncakes");
map9.put("stopwatch", "stopwatches");
map9.put("armchair", "armchairs");
map9.put("gentleman", "gentlemen");
map9.put("housewife", "housewives");
map9.put("wineglass", "wineglasses");
map9.put("greenhouse", "greenhouses");
//10、普通名词,加s
HashMap<String, String> map10 = new HashMap<>();
map10.put("map", "maps");
map10.put("orange", "oranges");
map10.put("user", "users");
map10.put("German", "Germans");
map10.put("Russian", "Russians");
map10.put("American", "Americans");
map10.put("Italian", "Italians");
map10.put("Indian", "Indians");
map10.put("Canadian", "Canadians");
map10.put("Australian", "Australians");
map10.put("Swede", "Swedes");
testIsSingular(map0, 0);
testIsSingular(map1, 1);
testIsSingular(map2, 2);
testIsSingular(map3, 3);
testIsSingular(map4, 4);
testIsSingular(map5, 5);
testIsSingular(map6, 6);
testIsSingular(map7, 7);
testIsSingular(map8, 8);
testIsSingular(map9, 9);
testIsSingular(map10, 10);
testPluralize(map0, 0);
testPluralize(map1, 1);
testPluralize(map2, 2);
testPluralize(map3, 3);
testPluralize(map4, 4);
testPluralize(map5, 5);
testPluralize(map6, 6);
testPluralize(map7, 7);
testPluralize(map8, 8);
testPluralize(map9, 9);
testPluralize(map10, 10);
testIsPlural(map0, 0);
testIsPlural(map1, 1);
testIsPlural(map2, 2);
testIsPlural(map3, 3);
testIsPlural(map4, 4);
testIsPlural(map5, 5);
testIsPlural(map6, 6);
testIsPlural(map7, 7);
testIsPlural(map8, 8);
testIsPlural(map9, 9);
testIsPlural(map10, 10);
testSingularize(map0, 0);
testSingularize(map1, 1);
testSingularize(map2, 2);
testSingularize(map3, 3);
testSingularize(map4, 4);
testSingularize(map5, 5);
testSingularize(map6, 6);
testSingularize(map7, 7);
testSingularize(map8, 8);
testSingularize(map9, 9);
testSingularize(map10, 10);
}
// 测试EnglishWordUtil.isSingular()
private static void testIsSingular(HashMap<String, String> map, int order) {
System.out.println("\n----------------是否为单数 测试第" + order + "种类型------------------");
for (String singular : map.keySet()) {
boolean isSingular = EnglishWordUtil.isSingular(singular);
String correctPlural = map.get(singular);
if (isSingular) {
System.out.println("第" + order + "种类型 " + singular + "为单数形式,判断正确");
} else {
System.err.println("第" + order + "种类型 " + singular + "为单数形式,判断错误");
}
}
}
// 测试EnglishWordUtil.pluralize()
private static void testPluralize(HashMap<String, String> map, int order) {
System.out.println("\n----------------单数变复数 测试第" + order + "种类型------------------");
for (String singular : map.keySet()) {
String plural = EnglishWordUtil.pluralize(singular);
String correctPlural = map.get(singular);
if (plural.equals(correctPlural)) {
System.out.println("第" + order + "种类型 单数:" + singular + ",复数:" + plural + ",判断正确");
} else {
System.err.println("第" + order + "种类型 单数:" + singular + ",复数:" + plural + ",判断错误,正确复数为:" + correctPlural);
}
}
}
// 测试EnglishWordUtil.isPlural()
private static void testIsPlural(HashMap<String, String> map, int order) {
System.out.println("\n----------------是否为复数 测试第" + order + "种类型------------------");
for (String singular : map.keySet()) {
String plural = map.get(singular);
boolean isPlural = EnglishWordUtil.isPlural(plural);
if (isPlural) {
System.out.println("第" + order + "种类型 " + plural + "为复数形式,判断正确");
} else {
System.err.println("第" + order + "种类型 " + plural + "为复数形式,判断错误");
}
}
}
// 测试EnglishWordUtil.singularize()
private static void testSingularize(HashMap<String, String> map, int order) {
System.out.println("\n----------------复数变单数 测试第" + order + "种类型------------------");
for (String correctSingular : map.keySet()) {
String plural = map.get(correctSingular);
String singular = EnglishWordUtil.singularize(plural);
if (singular.equals(correctSingular)) {
System.out.println("第" + order + "种类型 复数:" + plural + ",单数:" + singular + ",判断正确");
} else {
System.err.println("第" + order + "种类型 复数:" + plural + ",单数:" + singular + ",判断错误,正确单数为:" + correctSingular);
}
}
}
}
测试结果截图: