这种方法是我程序的瓶颈。 (或者,至少在大部分时间用完了。)
其目的是根据字符在两者中出现的方式计算两个字符串之间的相似性,或者在一个字符串中出现或不出现在另一个字符串中。
代码是正确的,但我想优化它。您是否看到任何低效率,不良做法或不必要的复杂性?
(不要担心Gene类。这里真正关注的是Genes携带的字符串。)
public static int simpleScore(Gene g0, Gene g1) {
if (g0.equals(g1)) { //identical genes have distance 0
return 0;
}
String frag0 = g0.getDNA();
String frag1 = g1.getDNA();
Map charOccurencesFrag0 = new HashMap(Constants.DNA_CHARS.length);
Map charOccurencesFrag1 = new HashMap(Constants.DNA_CHARS.length);
int multSCOST = 0;
int multDCOST = 0;
int multDDCOST = 0;
for (char c : Constants.DNA_CHARS) { //look through all acceptable DNA chars
charOccurencesFrag0.put(c, StringUtils.countMatches(frag0, "" + c));
charOccurencesFrag1.put(c, StringUtils.countMatches(frag1, "" + c));
//if a char appears in one frag but not the other
if ((frag0.indexOf(c) != -1 && frag1.indexOf(c) == -1) || (frag0.indexOf(c) == -1 && frag1.indexOf(c) != -1)) {
multDDCOST += (charOccurencesFrag0.get(c) + charOccurencesFrag1.get(c));
}
//if a char appears in both frags
if (frag0.indexOf(c) != -1 && frag1.indexOf(c) != -1) {
int diff = Math.abs(charOccurencesFrag0.get(c) - charOccurencesFrag1.get(c));
multDCOST += diff;
multSCOST += Math.min(charOccurencesFrag0.get(c), charOccurencesFrag1.get(c));
}
}
return Constants.SCOST * multSCOST + Constants.DDCOST * multDDCOST + Constants.DCOST * multDCOST;
}根据回复更新了代码
这导致了相当大的加速。感谢所有评论的人。还有其他人有什么想法吗?
public static int simpleScore(Gene g0, Gene g1) {
if (g0.equals(g1)) { //identical genes have distance 0
return 0;
}
String frag0 = g0.getDNA();
String frag1 = g1.getDNA();
int[] charOccurencesFrag0 = countOccurrences(frag0, Constants.DNA_CHARS);
int[] charOccurencesFrag1 = countOccurrences(frag1, Constants.DNA_CHARS);
int multSCOST = 0;
int multDCOST = 0;
int multDDCOST = 0;
for (int ket = 0; ket < Constants.DNA_CHARS.length; ket++) {
//if a char appears in one frag but not the other
if ((charOccurencesFrag0[ket] > 0 && charOccurencesFrag1[ket] == 0) || (charOccurencesFrag0[ket] == 0 && charOccurencesFrag1[ket] >0)){
multDDCOST += charOccurencesFrag0[ket] + charOccurencesFrag1[ket];
}
//if a char appears in both frags
if (charOccurencesFrag0[ket] != 0 && charOccurencesFrag1[ket] != 0){
int diff = Math.abs(charOccurencesFrag0[ket] - charOccurencesFrag1[ket]);
multDCOST += diff;
multSCOST += Math.min(charOccurencesFrag0[ket] , charOccurencesFrag1[ket]);
}
}
return Constants.SCOST * multSCOST + Constants.DDCOST * multDDCOST + Constants.DCOST * multDCOST;
}
// from MAK on SO
private static int[] countOccurrences(String x, char[] validDNAChars){
int[] count=new int[validDNAChars.length];
for(int i=0;i
int index=-1;
for(int j=0;j
if (x.charAt(i)==validDNAChars[j]){
index=j;
}
}
if (index>=0) count[index]++;
}
return count;
}