java读取网页数据

package test;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

import com.sun.xml.internal.txw2.Document;

public class tools {

/**
* 测试方法
*
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
tools tool = null;
if (tool == null) { // 是否创建对象
tool = new tools();
}
String result = tool.getCEDContact("baidu");
System.out.println(result);
}

/**
* 处理字符
*
* @param buffer
* @return 处理结果
*/
private String splitString(StringBuffer buffer) {

String parseString;
String variable = null;
parseString = buffer.toString().replaceAll("<script(.|\n)+?</script>",
" ").replaceAll("<(.|\n)+?>", " ").replaceAll(" ", ""); // 替换所有的HTML标签

String[] splitArr = parseString.split("/n");
StringBuffer stringBuffer = new StringBuffer();
for (int i = 0; i < splitArr.length; i++) {
stringBuffer.append(splitArr[i]);
stringBuffer.append("\n");

}
variable = stringBuffer.toString();

if (variable.indexOf("CED") > 0 && variable.indexOf("Nameservers") > 0) {// 判断是否存在有需要的CED值
variable = variable.substring(variable.indexOf("CED"), variable
.indexOf("Nameservers"));
}
return variable;
}

/**
* 获取BufferedReader对象
*
* @param urlString链接地址
* @return bufferedReader
* @throws IOException
*/
private BufferedReader getBuffer(String urlString) throws IOException {
BufferedReader bufferedReader = null;
try {
URL url = new URL(urlString);
URLConnection conn = url.openConnection(); // 引用远程对象
bufferedReader = new BufferedReader(new InputStreamReader(conn
.getInputStream())); // 读取对象资源
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println(e.getMessage());
} finally {
return bufferedReader;
}

}

/**
* 获取CED信息
*
* @param parm
* @return String
* @throws IOException
*/
protected String getCEDContact(String parm) throws IOException {
tools tool = null;
if (tool == null) { // 是否创建对象
tool = new tools();
}
String urlString = "http://whois.asia/cgi-bin/whois.cgi?whois_query_field="
+ parm + "";
String result = null;
BufferedReader bufferedReader = tool.getBuffer(urlString);
StringBuffer buffer = new StringBuffer();
while ((result = bufferedReader.readLine()) != null) {
buffer.append(result);
buffer.append("/n");
}
result = tool.splitString(buffer);
bufferedReader.close();
return result;
}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值