通过java.net 访问相应url,过滤返回的信息,并保存到excel中 ,利用jexcel包进行EXcel文件操作,不足是jexcel中不好实现删除整行。建议以后使用java POI
import java.io.*;
import java.io.File;
import java.util.*;
import java.text.SimpleDateFormat;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
public class UrlTest {
public static void main(String args[]) {
String[] urls =null; //访问的路径名
String[] resultS=null; //返回的结果
java.net.URL l_url = null;
java.net.HttpURLConnection l_connection =null;
java.io.BufferedReader l_reader =null;
String sCurrentLine = " ";
String sTotalString = " ";
int indexNum0 =-1;
int indexNum1 =-1;
//int indexNumBegin = 0; //内容开始
//int indexNumEnd =0; //内容结束
Calendar rightNow = Calendar.getInstance();
SimpleDateFormat dateFm = new SimpleDateFormat("yyyy-MM-dd");
String time = dateFm.format(rightNow.getTime());
urls = new String[200];
resultS = new String[urls.length];
for(int i=0;i<urls.length;i++){
urls[i]="http://www.scpta.gov.cn/exam.aspx?Id="+(i+1);
}
for(int i=0;i<urls.length;i++ ){
try {
l_url = new java.net.URL(urls[i]);
l_connection = (java.net.HttpURLConnection) l_url
.openConnection();
l_connection.connect();
InputStream l_urlStream = l_connection.getInputStream();
l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString += sCurrentLine;
}
/****过滤内容开始*****/
indexNum0 = sTotalString.indexOf("class=\"zhuanti_990\">");
indexNum1 = sTotalString.indexOf("</div>",indexNum0);
resultS[i] = (sTotalString.substring(indexNum0+20,indexNum1)).trim();
// System.out.println("urls"+"["+i+"]"+":"+urls[i]);
// System.out.println("页面内容:"+sTotalString);
// System.out.println("截取内容:"+resultS[i]);
sTotalString="";
/****过滤内容结束*****/
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
try {
// 打开文件
// System.out.println("开始创建");
WritableWorkbook book = null;
String fileName ="四川人事考试专题整理("+time+").xls";
book = Workbook.createWorkbook(new File(fileName));
// 生成名为“第一页”的工作表,参数0表示这是第一页
WritableSheet sheet = book.createSheet(" 第一页 ", 0);
// 在Label对象的构造子中指名单元格位置是第一列第一行(0,0)
// 以及单元格内容为test
sheet.addCell(new Label(0, 0, "专题内容"));
sheet.addCell(new Label(1, 0, "专题对应地址"));
// 将定义好的单元格添加到工作表中
for(int i=0;i<urls.length;i++){
if(!(resultS[i].trim()).equals("")){
sheet.addCell(new Label(0, i+1, resultS[i]));
sheet.addCell(new Label(1, i+1, urls[i]));
}
}
for (int i = 0; i < urls.length; i++) {
if((resultS[i].trim()).equals("")){
sheet.removeRow(i+1);
}
}
// 写入数据并关闭文件
book.write();
book.close();
} catch (Exception e) {
System.out.println(e);
}
}
}