java抓取页面数据保存至数据库乱码怎么解决?

public class subject {
public static void getOutpatientService(String url) {
try {
Parser myParser = new Parser(url);
NodeList nodeList = null;
myParser.setEncoding("gbk");

NodeFilter tableFilter = new NodeClassFilter(TableTag.class);
OrFilter lastFilter = new OrFilter();
lastFilter.setPredicates(new NodeFilter[] { tableFilter });

NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);

nodeList = myParser.parse(lastFilter);

int class1 = 0;
int class2 = 0;
int class3 = 0;

for (int i = 0; i <= nodeList.size(); i++) {
if (nodeList.elementAt(i) instanceof TableTag) {
TableTag tag = (TableTag) nodeList.elementAt(i);

String tableid = tag.getAttribute("id");
if (tableid != null
&& tableid
.trim()
.equals(
"_ctl0_cphMain_WucOPRegister_Step1_wucStep1_WucOPDepartmentList1_dgOPDepartment")) {
TableRow[] rows = tag.getRows();
System.out.println(rows.length);

for (int j = 0; j < rows.length; j++) {
TableRow tr = (TableRow) rows[j];

TableColumn[] td = tr.getColumns();


if (td.length == 5) {

class1 = insertIntoDb(td[0].toPlainTextString()
.trim(), "", 0);

class2 = insertIntoDb(td[1].toPlainTextString()
.trim(), "", class1);



//测试
byte [] s=td[0].toPlainTextString().trim().getBytes();
for (byte b : s) {
System.err.println(b);
}


for (int tdl = 2; tdl < 5; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList
.elementAt(aListcount);
if (linkTag.getAttribute("href") != null) {
insertIntoDb(td[tdl]
.toPlainTextString()
.trim(), linkTag
.getAttribute("href")
.replace("&", "&"),
class2);
}
}
}
}
}

if (td.length == 4) {
// 浜岀被
class2 = insertIntoDb(td[0].toPlainTextString()
.trim(), "", class1);

for (int tdl = 1; tdl < 4; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList
.elementAt(aListcount);

if (linkTag.getAttribute("href") != null) {
insertIntoDb(td[tdl]
.toPlainTextString()
.trim(), linkTag
.getAttribute("href")
.replace("&", "&"),
class2);
}
}
}
}

}

if (td.length == 3) {
// 涓夌被
for (int tdl = 0; tdl < 3; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList.elementAt(aListcount);

if (linkTag.getAttribute("href") != null) {
System.err.println(linkTag
.getAttribute("href").replace("&", "&"));

insertIntoDb(td[tdl]
.toPlainTextString()
.trim(), linkTag
.getAttribute("href")
.replace("&", "&"),
class2);
}
}
}
}
}

}
}
}
}

} catch (ParserException e) {
System.out.println(e.getMessage());
e.printStackTrace();
}

}

private static int insertIntoDb(String name, String url, int sort) {
// 鎻掑叆绉戝淇℃伅
Connection connection = null;
int maxid = 0;
try {
connection = DBConnection.getConnection();
Statement stat = connection.createStatement();

/* stat.execute("insert into office (officename,officeurl,officesort) values ('" + name
+ "','" + url + "','" + sort + "')");*/

stat.execute("select officeid from office where officeurl='" + url
+ "' and officename='" + name + "'");
ResultSet rs = stat.getResultSet();

while (rs.next()) {
maxid = rs.getInt(1);
}

DBConnection.closeConnection(connection);

} catch (Exception e) {
System.out.println(e.getMessage());
}
return maxid;
}

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值