第一次写java小程序,作为编程处女作还是记录下来
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 苏州
import java.io.*;
public class TestTxt2
{
//去掉标题函数
public String deleteTitle(String source,String str)
{
int from = source.indexOf("<title>") + 7; //from找到title的起始点
int end = source.indexOf("</title>"); //end找到title的终止点
String title = source.substring(from,end);//从正文中由from end截取title
System.out.println(title);
String newTitle = title.replace(str, "");//将title中的候选字段str删去 (用空进行置换)
String newSource = source.replace(title, newTitle);
System.out.println("the founction of deleTitle is used :");
return newSource;
}
//
public String deleteArguments(String source){
String newSource = source;//
try
{
int locate = source.indexOf("<");
while( locate != -1)
{
int from = source.indexOf(" ",locate);
int end = source.indexOf(">",locate);
if (from != -1 && (from < end))
{
String deleteString = source.substring(from,end);
String changeString = source.substring(locate,end+1);
String newString = changeString.replace(deleteString, "");
newSource = newSource.replace(changeString,newString);
locate = source.indexOf("<",locate+1);
}
else
locate = source.indexOf("<",locate+1);
}
}
catch (Exception e){}
System.out.println("the founction of deleArguments is used :");
return newSource;
}
public String searchTheNext(String source,String str)
{
int locate = source.indexOf(str);
boolean flag = true;
int from = source.indexOf(">",locate) + 1;
int end = source.indexOf("<",from);
if ((from == -1)||(end == -1)||(from > end))
return null;
String strTheNext = source.substring(from,end);
while(flag)
{
// System.out.println(flag);
// System.out.println( locate + " " + from + " " + end);
// System.out.println(strTheNext);
if (strTheNext.trim().length() == 0 )
{
from = source.indexOf(">",end) + 1;
end = source.indexOf("<",from);
strTheNext = source.substring(from,end);
}
else
{
flag = false;
}
}
System.out.println(" the founction of searchTheNext is used ");
return strTheNext;
}
public String searchTheLast(String source,String str)
{
int locate = source.indexOf(str);
boolean flag = true;
int end = source.lastIndexOf("<",locate);
int from = source.lastIndexOf(">",end) + 1;
if ((from == -1)||(end == -1)||(from > end))
return null;
String strTheLast = source.substring(from,end);
while(flag)
{
if (strTheLast.trim().length() == 0)
{
end = source.lastIndexOf("<",from-1);
from = source.lastIndexOf(">",end) + 1;
strTheLast = source.substring(from,end);
}
else
{
flag = false;
}
}
System.out.println(" the founction of searchTheLast is used ");
return strTheLast;
}
public String toCatch(String source,String str)
{
TestTxt t = new TestTxt();
String theNext = t.searchTheNext(source, str);
System.out.println(theNext);
int locateStr = source.indexOf(str);
int locateTheNext = source.indexOf(theNext,locateStr);
int from = 0 , end = 0 ;
int temp1 = source.indexOf("<",locateTheNext);
while (source.charAt(temp1 + 1) == '/')
{
temp1 = source.indexOf("<",temp1+1);
end = temp1;
}
int temp2 = source.lastIndexOf("<",locateStr);
while (source.charAt(temp2 + 1) != '/')
{
temp2 = source.lastIndexOf("<",temp2-1);
from = temp2;
}
String toCatch = source.substring(from,end);
return toCatch;
}
public String read(String path)
{
String htmlSource = "";
try
{
File f = new File(path);
InputStreamReader read = new InputStreamReader(new FileInputStream(f),"UTF-8");
BufferedReader reader = new BufferedReader(read);
String line = "";
line = reader.readLine();
while(line != null)
{
htmlSource = htmlSource + line + "\r\n";
line = reader.readLine();
}
read.close();
reader.close();
return htmlSource;
}
catch (Exception e){
System.out.println(e);
}
return null;
}
public void write(String fileName,String source)
{
try
{
OutputStreamWriter output = new OutputStreamWriter(new FileOutputStream(fileName),"UTF-8");
output.write(source);
output.close();
}
catch(IOException e)
{
System.out.println(e);
}
}
public static void main(String[] args)
{
try
{
String path = "E:\\JAVA\\Crawl\\HelloWorld\\1.txt";
TestTxt2 t = new TestTxt2();
String source = t.read(path);
System.out.println("-----------------------");
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata2.txt",source);
source = t.deleteTitle(source,"HTC Desire HD 渴望A9191 G10 800万像素 智能手机【乐天数码】- | 网购-拍拍网");
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata21.txt",source);
System.out.println("-----------------------");
System.out.println(" 去掉标签内参数 : ");
String deleteArgumentSource = t.deleteArguments(source);
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata22.txt",deleteArgumentSource);
System.out.println("-----------------------");
String use = "交易安全";
System.out.println("-----------------------");
System.out.println("寻找后面对称信息 :");
String toFindTheNext = t.searchTheNext(deleteArgumentSource,use);
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata23.txt",toFindTheNext);
System.out.println("-----------------------");
System.out.println("寻找前面对称信息 :");
String toFindTheLast = t.searchTheLast(deleteArgumentSource,use);
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata24.txt",toFindTheLast);
System.out.println("-----------------------");
System.out.println("提取对称信息 :");
String toCatch = t.toCatch(deleteArgumentSource,use);
t.write("E:\\JAVA\\Crawl\\HelloWorld\\mydata25.txt",toCatch);
System.out.println("the main is over");
}
catch (Exception e)
{
System.out.println(e);
}
}
}