java读取html页面并解析<table><tr><td>

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class aaa2 {

	public static void main(String[] args) throws IOException {
		//要读取的html文件路径
		File f = new File("D://360安全浏览器下载/2.2.html");
		// 输入流
		InputStreamReader isr1 = new InputStreamReader(new FileInputStream(f), "UTF-8");
		BufferedReader br = new BufferedReader(isr1);
		// 获取html转换成String
		String s;
		String AllContent = "";
		//按行读取
		while ((s = br.readLine()) != null) {
			AllContent = AllContent + s;
		}
		// 使用后HTML Parser 控件
		Parser myParser;
		NodeList nodeList = null;
		myParser = Parser.createParser(AllContent, "gbk");
		NodeFilter tableFilter = new NodeClassFilter(TableTag.class);
		OrFilter lastFilter = new OrFilter();
		lastFilter.setPredicates(new NodeFilter[] { tableFilter });
		try {
			// 获取标签为table的节点列表
			nodeList = myParser.parse(lastFilter);
			// 循环读取每个table
			for (int i = 0; i <=nodeList.size(); i++) {
				if (nodeList.elementAt(i) instanceof TableTag) {
					TableTag tag = (TableTag) nodeList.elementAt(i);
					TableRow[] rows = tag.getRows();
					System.out.println("----------------------table  " + i + "--------------------------------");
					// 循环读取每一行
					for (int j = 0; j < rows.length; j++) {
						TableRow tr = (TableRow) rows[j];
						TableColumn[] td = tr.getColumns();
						// 读取每行的单元格内容
						for (int k = 0; k < td.length; k++) {
							String b = td[k].getStringText();
							System.out.println(b);
						}
					}
				}
			}

		} catch (ParserException e) {
			e.printStackTrace();
		}
	}
}

请解释以下代码<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <title>注册第一步</title> </head> <body> <% // 设置请求编码方式,防止中文乱码问题 request.setCharacterEncoding("UTF-8"); %> <!-- 查找JavaBean对象,使用请求参数为对象属性赋值 --> <jsp:useBean id="user" class="com.test.chapter07.javabean.UserBean" scope="session" /> <jsp:setProperty property="*" name="user" /> <h2 align="center">用户注册信息确认</h2> <form action="registerSuccess.jsp" method="post"> <table border="1" width="50%" align="center"> <tr> <td>用户名:</td> <td><jsp:getProperty property="username" name="user" /></td> </tr> <tr> <td>密 码:</td> <td><jsp:getProperty property="password" name="user" /></td> </tr> <tr> <td>性别:</td> <td><jsp:getProperty property="sex" name="user" /></td> </tr> <tr> <td>年龄:</td> <td><jsp:getProperty property="age" name="user" /></td> </tr> <tr> <td>提示信息:</td> <td><jsp:getProperty property="tooltip" name="user" /></td> </tr> <tr> <td>提示答案:</td> <td><jsp:getProperty property="answer" name="user" /></td> </tr> <tr> <td>邮箱:</td> <td><jsp:getProperty property="email" name="user" /></td> </tr> <tr> <td>愿意接受信息:</td> <td><jsp:getProperty property="messageChoose" name="user" /></td> </tr> <tr> <td colspan="2" align="center"><input type="submit" value="确认提交"></td> </tr> </table> </form> </body> </html>
05-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值