关闭

将网页中的table导入excel2007

210人阅读 评论(0) 收藏 举报

公司为了测试单板的性能,使用firefox工具查看每个请求的,因此使用了firebug的YSlow工具,但是每个选项都必须复制黏贴很是麻烦,因此这里写了一个小程序,将网页中的table导入到excel中

 

以前使用的jxl.jar这个工具包不支持office2007,为了解决这方面的问题,可以使用apache的POI。

 

现在大家已经慢慢的熟悉了office2007的操作,因此我也就用07作为示范了例子

 

模拟一个table例子

<html> 
<head> 
<TITLE>class_obj_js_class</TITLE> 
<body > 
    <table>
		<tr>
			<td style="color:#00ff00">01</td>
			<td>02</td>
			<td>03</td>
		</tr>
		<tr>
			<td>11
			</td>
			<td>12</td>
			<td>13</td>
		</tr>
		<tr>
			<td style="color:#ff0000">2
			1</td>
			<td>22</td>
			<td>23</td>
		</tr>
	</table>
</body> 
</html>

 

 

读取网页中的table

package hb.com;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class TableToExcel {

	//因为网页中可能对标签设置样式,因此只能根据前面的一部分可以判断
	private static String ROW_FLAG_BEGIN = "<tr";
	private static String ROW_FLAG_END = "</tr";
	private static String COL_FLAG_BEGIN = "<td";
	private static String COL_FLAG_END = "</td";
	private static int NOT_HAS_STRING = -1;
	
	public static void main(String[] args) {
		
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("temp.html")));
			String temp;
			String result = "";
			//XSSFWorkbook创建的对象可以操作office2007,而HSSFWorkbook()创建的对象只能控制2003之前的版本
			Workbook wb = new XSSFWorkbook();
//			Workbook wb = new HSSFWorkbook();
			Sheet sheet = wb.createSheet("huangbiao test");
			int begin,end;
			int rowNum = -1, colNum = -1;
			Row row = null;
			Cell cell = null;
			//判断某一列内容是否已经结束
			boolean hasContent = false;
			
			while((temp = br.readLine()) != null){
				begin = temp.indexOf(TableToExcel.COL_FLAG_BEGIN);
				end  = temp.indexOf(TableToExcel.COL_FLAG_END);
				if(temp.indexOf(TableToExcel.ROW_FLAG_BEGIN) != TableToExcel.NOT_HAS_STRING){
					rowNum = rowNum + 1;
					colNum = -1;
					row = sheet.createRow(rowNum);
				}else{
					
				}
				//当前行有<td>标签
				if(begin != TableToExcel.NOT_HAS_STRING)
				{
					colNum = colNum + 1;
					hasContent = true;
					cell = row.createCell(colNum);
					//表明<td></td>两个标签在同一行
					if(end != TableToExcel.NOT_HAS_STRING)
					{
						result = TableToExcel.getInSameRow(temp);
						cell.setCellValue(result);
						hasContent = false;
					}else{
						//表明<td></td>两个标签不在同一行,且只有开始标签,没有结束标签
						result = TableToExcel.getInDiffRow(temp);
					}
					
				}
				//当前行没有<td>标签
				else{
					//当前行有</td>标签且没有开始标签,需要把前面的数据插入到excel中
					if(end != TableToExcel.NOT_HAS_STRING)
					{
						result = result + TableToExcel.getInBeforRow(temp);
						cell.setCellValue(result);
						hasContent = false;
					}
				}
			}
			
			// Write the output to a file
		    FileOutputStream fileOut = new FileOutputStream("test.xlsx");
		    wb.write(fileOut);
		    fileOut.close();
			
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	//获取<td></td>标签中间的字符串
	public static String getInSameRow(String str){
		//<(.*)> .* <\/\1> 
		Pattern p = Pattern.compile("(<td(.*)>)(.*)<\\/td>");
		Matcher m = p.matcher(str);
		//m.find()方法一定不能少,否则后面的group()方法通不过
		System.out.println(m.find());
		System.out.println(m.group(3));
		return m.group(3);
	}
	
	//获取<td标签后面的字符串
	public static String getInDiffRow(String str){
		//<(.*)> .* <\/\1> 
		Pattern p = Pattern.compile("(<td(.*)>)(.*)");
		Matcher m = p.matcher(str);
		//m.find()方法一定不能少,否则后面的group()方法通不过
		System.out.println(m.find());
		System.out.println(m.group(3));
		return m.group(3);
	}
	
	//获取结束行前面的字符串
	public static String getInBeforRow(String str){
		//<(.*)> .* <\/\1> 
		Pattern p = Pattern.compile("(.*)<\\/td>");
		Matcher m = p.matcher(str);
		//m.find()方法一定不能少,否则后面的group()方法通不过
		System.out.println(m.find());
		System.out.println(m.group(1));
		return m.group(1);
	}
}

 

读取excel中的数据,然后做相应的修改

package hb.com;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class ReadExcel {
	
	public static String fileName = "test.xlsx";
	
	public static void main(String[] args) {
		ReadExcel.readAndRewrite(fileName);
	}
	
	public static void readAndRewrite(String fileName){
		InputStream inp;
		try {
			inp = new FileInputStream(fileName);
			Workbook wb = WorkbookFactory.create(inp);
			//遍历所有的数据
			ReadExcel.IteratorRowCell(wb);
			//将excel的第一行第一列的数据改为huangbiao
		    Sheet sheet = wb.getSheetAt(0);
		    Row row = sheet.getRow(0);
		    Cell cell = row.getCell(0);
		    //给指定的单元格设定值
		    if (cell != null){
		    	cell.setCellValue("huangbiao");
		    }
		    //设定完值之后然后通过数据流写入文件中保存起来
		    FileOutputStream fileOut = new FileOutputStream(fileName);
		    wb.write(fileOut);
		    fileOut.close();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	//遍历所有的数据
	public static void IteratorRowCell(Workbook wb){
		Sheet sheet = wb.getSheetAt(0);
		for (Iterator<Row> rit = sheet.rowIterator(); rit.hasNext(); ) {
			Row row = rit.next();
			for (Iterator<Cell> cit = row.cellIterator(); cit.hasNext(); ) {
				Cell cell = cit.next();
				// Do something here
				System.out.println(cell);
			}
		}
	}
}

 

0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:311326次
    • 积分:8728
    • 等级:
    • 排名:第2259名
    • 原创:1096篇
    • 转载:14篇
    • 译文:0篇
    • 评论:5条
    最新评论