package org.lwc.exam; import org.apache.logging.log4j.util.Strings; import java.io.*; import java.math.RoundingMode; import java.sql.SQLOutput; import java.text.DecimalFormat; import java.util.*; /** * Title: * * @author l00563781 * @version 1.0 * @Description * @since 2020/10/19 */ public class ACTG { static DecimalFormat df = new DecimalFormat( "0.00"); static boolean isLoop = false; // 非特异性指数的最大值序列 默认1,长度和拒绝文库长度相同,循环模式的第一次循环中启用 static List<Double> loopMaxNS1 = Arrays.asList( 1.0,1.0,1.0 ); // 非特异性指数的最大值序列 默认1,长度和拒绝文库长度相同,循环模式的除第一次循环外启用 static List<Double> loopMaxNS2 = Arrays.asList( 1.0,1.0,1.0,1.0,1.0 ); // 循环次数 static int loopNum = 3; // 每次循环的链长 static List<Integer> loopSizeList = Arrays.asList(3,4,5); static boolean stopFlag = false; // 需要指定的数据 static int rejNum; static ArrayList<Compos> totalCompos; static ArrayList<Result> resultArrayList; // 是否具有碱基倾向性 static boolean isTendency = false; // 碱基倾向性 static char tendency; // 输入序列的数量 static int inputNum; // 输入序列 static List<String> inputList = Arrays.asList( "AATCATG" ); // 非特异性指数的最大值序列 默认1,长度和拒绝文库长度相同 static List<Double> maxNS = Arrays.asList( 1.0,1.0,1.0 ); // 输出序列数目: -1 遍历所有可能性; i=0,遍历完结果后立即停止程序;i=n,计算出n条结果后程序停止 static int generateNum; // 输出链长,暂时认为>=3 static int resultLength; static double maxCGRate; static double minCGRate; /** * 四种碱基组合map */ static Map<Character, Character> ref = null; /** * 拒绝文库中不同碱基对增加值map */ static Map<Compos,Double> rejAccuRef = null; // 连续字符的最长数量 static Map<Character,Integer> maxSequenceRef = null; static { df.setRoundingMode(RoundingMode.HALF_UP); generateNum = -1; inputNum = 1; resultLength = 4; ref = new HashMap<>(); ref.put('A', 'T'); ref.put('C', 'G'); ref.put('G', 'C'); ref.put('T', 'A'); rejAccuRef = new HashMap<>(); rejAccuRef.put(new Compos('A','A'),1.0); rejAccuRef.put(new Compos('A','T'),1.0); rejAccuRef.put(new Compos('A','C'),1.0); rejAccuRef.put(new Compos('A','G'),1.0); rejAccuRef.put(new Compos('T','A'),1.0); rejAccuRef.put(new Compos('T','T'),1.0); rejAccuRef.put(new Compos('T','C'),1.0); rejAccuRef.put(new Compos('T','G'),1.0); rejAccuRef.put(new Compos('C','A'),1.0); rejAccuRef.put(new Compos('C','T'),1.0); rejAccuRef.put(new Compos('C','C'),1.0); rejAccuRef.put(new Compos('C','G'),1.0); rejAccuRef.put(new Compos('G','A'),1.0); rejAccuRef.put(new Compos('G','T'),1.0); rejAccuRef.put(new Compos('G','C'),1.0); rejAccuRef.put(new Compos('G','G'),1.0); maxSequenceRef = new HashMap<>(); maxSequenceRef.put('A',10); maxSequenceRef.put('T',10); maxSequenceRef.put('C',10); maxSequenceRef.put('G',10); } public static void main(String[] args) throws IOException, ClassNotFoundException { if(!isLoop){ handle(); }else { // 设置非特异型指数的最大值序列 // 设置得到的序列数量为1 // 设置本次链长 ArrayList<Result> results = new ArrayList<>(); String lastLastString = ""; String lastString = inputList.get(0); for(int i =0;i<loopNum;i++){ stopFlag = false; if(i==0){ maxNS = loopMaxNS1; }else { maxNS = loopMaxNS2; } resultLength = loopSizeList.get(i); generateNum = 1; if(lastLastString.equals("")){ inputList = Arrays.asList(lastString); }else { inputList = Arrays.asList(lastLastString,lastString); } Result result = handle(); if(result == null){ System.err.println("循环第"+(i+1)+"次失败,请检查输入"); break; } results.add(result); lastLastString = lastString; lastString = result.chain; } System.out.println("LOOP RESULT:"); for(int j = 0;j<results.size();j++){ Result result = results.get(j); System.out.println("---"+result.id+"---"+result.chain+"---"+Arrays.toString(formatDoubleArray(result.degree))+"---"+formatDouble(result.cgRate)+"---"+getReverseRef(result.chain)); } } } public static Result handle() throws IOException, ClassNotFoundException { // 初始化 maxCGRate = 0.75; minCGRate = 0.25; tendency = 'A'; isTendency = true; // 初始化拒绝文库 ArrayList<List<Compos>> ArrayList<List<Compos>> rejCompos = new ArrayList<>(); for (String string : inputList) { List<Compos> composList = getComposList(string); rejCompos.add(composList); List<Compos> composList1 = getComposList(getReverseRef(string)); rejCompos.add(composList1); } List<Compos> lastComposList = new ArrayList<>(); rejCompos.add(lastComposList); // 初始化非特异性指数数组 rejNum = rejCompos.size(); double[] degree = new double[rejNum]; // 生成全组合文库 totalCompos = null; if (isTendency) { String base = "ATCG"; StringBuilder newBase = new StringBuilder(); newBase.append(tendency); for (int i = 0; i < 4; i++) { char current = base.charAt(i); if (current == tendency) { continue; } newBase.append(current); } String baseString = newBase.toString(); totalCompos = new ArrayList<Compos>(); totalCompos.addAll(generateComposStartWith(baseString)); } else { String base = "ATCG"; totalCompos = new ArrayList<Compos>(); totalCompos.addAll(generateComposStartWith(base)); } resultArrayList = new ArrayList<Result>(); String lastChain = ""; ArrayList<List<Compos>> rejComposCopy = deepCopy(rejCompos); diGui(degree,lastChain,rejComposCopy); for(int j = 0;j<resultArrayList.size();j++){ Result result = resultArrayList.get(j); System.out.println("---"+result.id+"---"+result.chain+"---"+Arrays.toString(formatDoubleArray(result.degree))+"---"+formatDouble(result.cgRate)+"---"+getReverseRef(result.chain)); } return resultArrayList.get(0); } public static void diGui(double[] degree,String lastChain,ArrayList<List<Compos>> rejCompos) throws IOException, ClassNotFoundException { // 深度遍历,在开始设置递归路径,并且记录相关 // 初始话场景,非追加场景,不会生成result if(Strings.isBlank(lastChain)){ for(int i=0;i<totalCompos.size();i++){ ArrayList<List<Compos>> newRejCompos = deepCopy(rejCompos); double[] newDegree = degree.clone(); Compos current = totalCompos.get(i); String newChain = current.first+""+current.last; // 得到此组合的互补组合 Compos reverseCurrent = new Compos( getRef(current.last), getRef(current.first) ); // 最后一个拒绝文库获得互补组合 newRejCompos.get(rejNum - 1).add(reverseCurrent); // 判断拒绝文库中是否有此组合 for (int j = 0; j < rejNum; j++) { // 如果有, if (newRejCompos.get(j).contains(current)) { // 对应ns元素+1 // System.out.println(current); // System.out.println(newDegree[j]); // System.out.println(rejAccuRef.get(current)); newDegree[j] += rejAccuRef.get(current); // 文库移除一次此组合 Iterator<Compos> it = newRejCompos.get(j).iterator(); while (it.hasNext()) { Compos compos = (Compos) it.next(); if (compos.equals(current)) { it.remove(); break; } } } } diGui(newDegree,newChain,newRejCompos); } } else { if(stopFlag){ return; } // 首先对上一次的结果进行判断 // 等于规定的链长才有可能是正确结果 if (lastChain.length() == resultLength) { boolean etNumOne = false; // 判断非特异性数组中是否有大于1的元素 for(int x=0;x<degree.length;x++){ double maxValue = maxNS.get(x); if (degree[x] > maxValue) { etNumOne = true; break; } } // 通过特异性验证 if (!etNumOne) { // 计算CG碱基数量 double cgRate; int CGNum = 0; for(int index =0; index<lastChain.length();index++){ char charToCG = lastChain.charAt(index); if(charToCG == 'C' || charToCG == 'G'){ CGNum++; } } cgRate = (CGNum+0.0)/resultLength; if(cgRate>=minCGRate && cgRate<=maxCGRate){ // 成功情况, 保留此Result Result result = new Result(); result.id = resultArrayList.size()+1; result.chain = lastChain; result.cgRate = cgRate; result.degree = degree; char lastChar = '0'; int length = 1; boolean overMaxSequence = false; for(int x=0; x<lastChain.length();x++){ if(lastChar == lastChain.charAt(x)){ length++; if(length>maxSequenceRef.get(lastChar)){ overMaxSequence = true; break; } }else { length = 1; } lastChar = lastChain.charAt(x); } if(!overMaxSequence) { System.out.println("process: "+result.chain+" succeed"); resultArrayList.add(result); } if(generateNum == 0){ // 暂时看不懂这种情况 }else if(generateNum == -1){ // 继续遍历出所有情况 }else { // 设置停止flag if(resultArrayList.size()>=generateNum){ stopFlag = true; } } } } }else { // 这里的lastChain的长度一定没有到达resultLength for(int i=0;i<totalCompos.size();i++){ char lastChar = lastChain.charAt(lastChain.length()-1); Compos current = totalCompos.get(i); char currentFirstChar = current.first; if (lastChar == currentFirstChar) { ArrayList<List<Compos>> newRejCompos = deepCopy(rejCompos); double[] newDegree = degree.clone(); String newChain = lastChain + current.last; // 得到此组合的互补组合 Compos reverseCurrent = new Compos( getRef(current.last), getRef(current.first) ); // 最后一个拒绝文库获得互补组合 newRejCompos.get(rejNum - 1).add(reverseCurrent); // 判断拒绝文库中是否有此组合 for (int j = 0; j < rejNum; j++) { // 如果有, if (newRejCompos.get(j).contains(current)) { // 对应ns元素+1 newDegree[j] += rejAccuRef.get(current); // 文库移除一次此组合 Iterator<Compos> it = newRejCompos.get(j).iterator(); while (it.hasNext()) { Compos compos = (Compos) it.next(); if (compos.equals(current)) { it.remove(); break; } } } } System.out.println("process: "+newChain); diGui(newDegree, newChain, newRejCompos); } } } } } public static ArrayList deepCopy(ArrayList src) throws IOException, ClassNotFoundException { ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(byteOut); out.writeObject(src); ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); ObjectInputStream in = new ObjectInputStream(byteIn); ArrayList dest = (ArrayList) in.readObject(); return dest; } public static ArrayList<Compos> generateComposStartWith(String string) { ArrayList<Compos> arrayList = new ArrayList<>(); for(int i=0;i<4;i++){ for(int j=0;j<4;j++){ arrayList.add(new Compos(string.charAt(i),string.charAt(j))); } } return arrayList; } /** * 获得一个序列的碱基对组合 * * @param input1 输入的碱基序列 * @return List, 碱基组合的列表 */ public static ArrayList<Compos> getComposList(String input1) { ArrayList<Compos> result = new ArrayList<>(); for (int i = 1; i < input1.length(); i++) { Compos newCompos = new Compos(input1.charAt(i - 1) , input1.charAt(i)); result.add(newCompos); } return result; } /** * 获得一个序列的反向序列 */ public static String getReverseRef(String input) { StringBuilder output = new StringBuilder(); for (int i = input.length()-1; i > -1; i--) { char current = input.charAt(i); output.append(getRef(current)); } return output.toString(); } /** * 得到碱基的互补字符 */ public static char getRef(char origin) { if (ref.containsKey(origin)) { return ref.get(origin); } else { System.out.println("输入的字符错误"); throw new RuntimeException(); } } public static String formatDouble(double d) { return df.format(d); } public static String[] formatDoubleArray(double[] ds) { String[] strings = new String[ds.length]; int i = 0; for (double d : ds) { strings[i++] = formatDouble(d); } return strings; } } /** * 两个碱基形成一个碱基组合 */ class Compos implements Comparable<Compos>,Serializable { char first; char last; public Compos(char first, char last) { this.first = first; this.last = last; } @Override public boolean equals(Object o){ if(o instanceof Compos){ Compos compos = (Compos)o; if(compos.first == this.first && compos.last==this.last){ return true; } } return false; } @Override public int compareTo(Compos o) { if (this.first == o.first && this.last == o.last) { return 0; } else { return 1; } } @Override public int hashCode(){ return this.first*100+this.last; } @Override public String toString(){ return this.first+" "+this.last; } } class Result implements Serializable{ int id; String chain; double[] degree; double cgRate; }
ATCG问题zzy bugfix1
最新推荐文章于 2022-06-21 17:05:12 发布