需求:给定一个文本,包含若干生物蛋白质信息字符串,要求截取特定信息字符串。
方法:利用JAVA语言的String类来操作。
关键代码:
//流程:1、读入文本数据;2、判断是否包含特定字符串信息;3、截取特定字符串;
//4、去除空格、制表符等无关字符;5、写回到目标文本
String str.contains("……");
str.indexOf(……);
//获取系统换行符:
System.getProperty("line.separator")
具体完整代码:
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class subString {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
BufferedReader br = new BufferedReader(new FileReader("1.txt"));
BufferedWriter wr = new BufferedWriter(new FileWriter("3.txt"));
String line = null;
String str="";
while((line = br.readLine()) != null)
{
str += line;
if(str.contains("/translation") && str.contains("/product")){
int preIndex = str.indexOf("/translation");
int endIndex = str.indexOf("/product");
String strObject = str.substring(preIndex+13, endIndex).trim();
Pattern p = Pattern.compile("\\s*|\t|\r|\n");
Matcher m = p.matcher(strObject);
String dest = m.replaceAll("");
wr.write(">hypothetical protein"+System.getProperty("line.separator")+dest);
//wr.write("\n\r");
wr.write(System.getProperty("line.separator"));
str = null;
strObject = null;
}
}
wr.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
System.out.println("not found file 1.txt");
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("read error");
e.printStackTrace();
}
System.out.println("end");
}
}