1.主方法(注释了代码多为了测试以及方法说明,打印函数也主要是为了调试)
package com.tylg.test1;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.apache.jena.ontology.DatatypeProperty;
import org.apache.jena.ontology.Individual;
import org.apache.jena.ontology.ObjectProperty;
import org.apache.jena.ontology.OntClass;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.ontology.OntModelSpec;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.util.iterator.ExtendedIterator;
public class Test {
public static void main(String[] args) {
OntologyParsing op = new OntologyParsing();
String file101 = "D:\\eclipse\\testing\\101\\onto.rdf";
String file201 = "D:\\eclipse\\testing\\201\\onto.rdf";
VectorSpaceMatrix vsm = new VectorSpaceMatrix();
System.out.println("我们要解析的文件是:" + file101);
System.out.println("我们要解析的文件是:" + file201);
double [][]classesAboutNGramLMATRIX = new double[parsingOfClassAboutSpecifiedFile(op, file101).length][parsingOfClassAboutSpecifiedFile(op, file201).length];
classesAboutNGramLMATRIX = vsm.vectorSpaceMatrixOfNGramL(parsingOfClassAboutSpecifiedFile(op, file101), parsingOfClassAboutSpecifiedFile(op, file201), 3);
double [][]datatypeproperityAboutNGramLMATRIX = new double[parsingOfDatatypeProperityAboutSpecifiedFile(op, file101).length][parsingOfDatatypeProperityAboutSpecifiedFile(op, file201).length];
datatypeproperityAboutNGramLMATRIX = vsm.vectorSpaceMatrixOfNGramL(parsingOfClassAboutSpecifiedFile(op, file101), parsingOfDatatypeProperityAboutSpecifiedFile(op, file201), 3);
double [][]objectproperityAboutNGramLMATRIX = new double[parsingOfObjectProperityAboutSpecifiedFile(op, file101).length][parsingOfObjectProperityAboutSpecifiedFile(op, file201).length];
objectproperityAboutNGramLMATRIX = vsm.vectorSpaceMatrixOfNGramL(parsingOfObjectProperityAboutSpecifiedFile(op, file101), parsingOfObjectProperityAboutSpecifiedFile(op, file201), 3);
System.out.println("\n*****************************\n* 关于NGramL的相似度矩阵 *\n*****************************");
System.out.println("\n关于class的NGramL相似度矩阵如下:");
for(int i = 0; i < classesAboutNGramLMATRIX.length; i++) {
for(int j = 0; j < classesAboutNGramLMATRIX[0].length; j++) {
System.out.print(classesAboutNGramLMATRIX[i][j] + " ");
}
System.out.println();
}
System.out.println("\n关于datatypeproperity的NGramL相似度矩阵如下:");
for(int i = 0; i < datatypeproperityAboutNGramLMATRIX.length; i++) {
for(int j = 0; j < datatypeproperityAboutNGramLMATRIX[0].length; j++) {
System.out.print(datatypeproperityAboutNGramLMATRIX[i][j] + " ");
}
System.out.println();
}
System.out.println("\n关于objectproperity的NGramL相似度矩阵如下:");
for(int i = 0; i < objectproperityAboutNGramLMATRIX.length; i++) {
for(int j = 0; j < objectproperityAboutNGramLMATRIX[0].length; j++) {
System.out.print(objectproperityAboutNGramLMATRIX[i][j] + " ");
}
System.out.println();
}
double [][]classesAboutCosSimilarMATRIX = new double[parsingOfClassAboutSpecifiedFile(op, file101).length][parsingOfClassAboutSpecifiedFile(op, file201).length];
classesAboutCosSimilarMATRIX = vsm.vectorSpaceMatrixOfCosSimilar(parsingOfClassAboutSpecifiedFile(op, file101), parsingOfClassAboutSpecifiedFile(op, file201));
double [][]datatypeproperityAboutCosSimilarMATRIX = new double[parsingOfDatatypeProperityAboutSpecifiedFile(op, file101).length][parsingOfDatatypeProperityAboutSpecifiedFile(op, file201).length];
datatypeproperityAboutCosSimilarMATRIX = vsm.vectorSpaceMatrixOfCosSimilar(parsingOfClassAboutSpecifiedFile(op, file101), parsingOfDatatypeProperityAboutSpecifiedFile(op, file201));
double [][]objectproperityAboutCosSimilarMATRIX = new double[parsingOfObjectProperityAboutSpecifiedFile(op, file101).length][parsingOfObjectProperityAboutSpecifiedFile(op, file201).length];
objectproperityAboutCosSimilarMATRIX = vsm.vectorSpaceMatrixOfCosSimilar(parsingOfObjectProperityAboutSpecifiedFile(op, file101), parsingOfObjectProperityAboutSpecifiedFile(op, file201));
System.out.println("\n\n********************************\n* 关于CosSimilar的相似度矩阵 *\n********************************");
System.out.println("\n关于class的CosSimilar相似度矩阵如下:");
for(int i = 0; i < classesAboutCosSimilarMATRIX.length; i++) {
for(int j = 0; j < classesAboutCosSimilarMATRIX[0].length; j++) {
System.out.print(classesAboutCosSimilarMATRIX[i][j] + " ");
}
System.out.println();
}
System.out.println("\n关于datatypeproperity的CosSimilar相似度矩阵如下:");
for(int i = 0; i < datatypeproperityAboutCosSimilarMATRIX.length; i++) {
for(int j = 0; j < datatypeproperityAboutCosSimilarMATRIX[0].length; j++) {
System.out.print(datatypeproperityAboutCosSimilarMATRIX[i][j] + " ");
}
System.out.println();
}
System.out.println("\n关于objectproperity的CosSimilar相似度矩阵如下:");
for(int i = 0; i < objectproperityAboutCosSimilarMATRIX.length; i++) {
for(int j = 0; j < objectproperityAboutCosSimilarMATRIX[0].length; j++) {
System.out.print(objectproperityAboutCosSimilarMATRIX[i][j] + " ");
}
System.out.println();
}
}
public static String[] parsingOfObjectProperityAboutSpecifiedFile(OntologyParsing op, String file1) {
int i3 = op.weGetObjectProperties(op.buildModel(file1)).length;
String []s3 = new String[i3];
s3 = op.weGetObjectProperties(op.buildModel(file1));
return s3;
}
public static String[] parsingOfDatatypeProperityAboutSpecifiedFile(OntologyParsing op, String file1) {
int i2 = op.weGetDatatypeProperities(op.buildModel(file1)).length;
String []s2 = new String[i2];
s2 = op.weGetDatatypeProperities(op.buildModel(file1));
return s2;
}
public static String[] parsingOfClassAboutSpecifiedFile(OntologyParsing op, String file1) {
int i1 = op.weGetClasses(op.buildModel(file1)).length;
String []s1 = new String[i1];
s1 = op.weGetClasses(op.buildModel(file1));
return s1;
}
}
2.该类是为了提供关于余弦相似度和NGramL的计算,输入都为两个字符串数组,输出为二维数组(即相似度矩阵)
package com.tylg.test1;
public class VectorSpaceMatrix {
public static double [][] vectorSpaceMatrixOfNGramL(String []s1, String []s2, int n) {
int len1 = s1.length;
int len2 = s2.length;
double [][]result = new double[len1][len2];
WaysOfSimilary wos = new WaysOfSimilary();
for(int i = 0; i < len1; i++) {
for(int j = 0; j < len2; j++) {
result[i][j] = wos.NGramL(s1[i], s2[j], n);
}
}
return result;
}
public static double[][] vectorSpaceMatrixOfCosSimilar(String []s1, String []s2) {
int len1 = s1.length;
int len2 = s2.length;
double [][]result = new double[len1][len2];
WaysOfSimilary wos = new WaysOfSimilary();
for(int i = 0; i < len1; i++) {
for(int j = 0; j < len2; j++) {
result[i][j] = wos.cosSimilar(s1[i], s2[j]);
}
}
return result;
}
}
3.将指定文件的本体进行解析,首个方法为本体模型构建,后面的三个方法为解析方法,在此解析的为class,dataproperity以及objectproperity。
package com.tylg.test1;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import org.apache.jena.ontology.DatatypeProperty;
import org.apache.jena.ontology.ObjectProperty;
import org.apache.jena.ontology.OntClass;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.ontology.OntModelSpec;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.util.iterator.ExtendedIterator;
public class OntologyParsing {
public static OntModel buildModel(String s) {
OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM);
try {
ontModel.read(new FileInputStream(s), "");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
return ontModel;
}
public String[] weGetClasses(OntModel ontModel) {
List<String> list = new ArrayList<>();
ExtendedIterator exIter = ontModel.listClasses();
int i1 = 0;
while (exIter.hasNext()) {
OntClass oc = (OntClass) exIter.next();
if ((oc.getURI() != null) && (oc.getURI().toString().startsWith("http://oaei"))) {
i1++;
String s1 = oc.getURI() + "";
String s2 = s1.substring(0, s1.indexOf("#"));
String s3 = s1.substring(s2.length() + 1, s1.length());
list.add(s3);
}
}
String[] classes = (String[]) list.toArray(new String[i1]);
return classes;
}
public String[] weGetDatatypeProperities(OntModel ontModel) {
List<String> list = new ArrayList<>();
int i2 = 0;
ExtendedIterator exIter1 = ontModel.listDatatypeProperties();
while (exIter1.hasNext()) {
DatatypeProperty oc = (DatatypeProperty) exIter1.next();
if (oc.getURI() != null && (oc.getURI().toString().startsWith("http://oaei"))) {
i2++;
String s1 = oc.getURI() + "";
String s2 = s1.substring(0, s1.indexOf("#"));
String s3 = s1.substring(s2.length() + 1, s1.length());
list.add(s3);
}
}
String[] datatypeproperities = (String[]) list.toArray(new String[i2]);
return datatypeproperities;
}
public String[] weGetObjectProperties(OntModel ontModel) {
ExtendedIterator exIter2 = ontModel.listObjectProperties();
int i3 = 0;
List<String> list = new ArrayList<>();
while (exIter2.hasNext()) {
ObjectProperty oc = (ObjectProperty) exIter2.next();
if (oc.getURI() != null && (oc.getURI().toString().startsWith("http://oaei"))) {
i3++;
String s1 = oc.getURI() + "";
String s2 = s1.substring(0, s1.indexOf("#"));
String s3 = s1.substring(s2.length() + 1, s1.length());
list.add(s3);
}
}
String []objectProperities = (String[]) list.toArray(new String[i3]);
return objectProperities;
}
}
4. 以下的两个方法为计算两个字符串的NGramL和余弦相似度,返回的为【0, 1】区间上的一个小数(需要调用自定义的数学工具类中小数位数保留方法)
package com.tylg.test1;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.search.spell.NGramDistance;
import com.tylg.math.MathTool;
public class WaysOfSimilary {
public static double NGramL(String a, String b, int n) {
NGramDistance ng = new NGramDistance(n);
return MathTool.decimalType(ng.getDistance(a, b));
}
public static double cosSimilar(String str1, String str2) {
Map<Character, int[]> vectorSpace = new HashMap<>();
int[] itemCountArray = null;
char[] strArray = str1.toCharArray();
for (int i = 0; i < strArray.length; i++) {
if (vectorSpace.containsKey(strArray[i])) {
++(vectorSpace.get(strArray[i])[0]);
} else {
itemCountArray = new int[2];
itemCountArray[0] = 1;
itemCountArray[1] = 0;
vectorSpace.put(strArray[i], itemCountArray);
}
}
strArray = str2.toCharArray();
for (int i = 0; i < strArray.length; i++) {
if (vectorSpace.containsKey(strArray[i])) {
++(vectorSpace.get(strArray[i])[1]);
} else {
itemCountArray = new int[2];
itemCountArray[0] = 0;
itemCountArray[1] = 1;
vectorSpace.put(strArray[i], itemCountArray);
}
}
double vector1Modulo = 0.00;
double vector2Modulo = 0.00;
double vectorProduct = 0.00;
Iterator iter = vectorSpace.entrySet().iterator();
for (Character key : vectorSpace.keySet()) {
itemCountArray = vectorSpace.get(key);
vector1Modulo += itemCountArray[0] * itemCountArray[0];
vector2Modulo += itemCountArray[1] * itemCountArray[1];
vectorProduct += itemCountArray[0] * itemCountArray[1];
}
vector1Modulo = Math.sqrt(vector1Modulo);
vector2Modulo = Math.sqrt(vector2Modulo);
return MathTool.decimalType(vectorProduct / (vector1Modulo * vector2Modulo));
}
}
5. 该类是为了提供一些自定义的数学方法,decimalType就是保留小数位数的方法。
package com.tylg.math;
import java.math.BigDecimal;
public class MathTool {
public static double decimalType(double x) {
BigDecimal b = new BigDecimal(x);
double f = b.setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
return f;
}
}