package com.fh.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
-
说明:爬取网页
-
作者:FH Admin
-
from:tjhelitong.com
*/
public class GetWeb {/**
- 获取当前网页的code
- @param httpUrl
-
网页地址
- @return
- @throws IOException
*/
public static String getHtmlCode(String httpUrl) throws IOException {
String content = “”; // 定义字符串content
URL url = new URL(httpUrl); // 生成传入的URL的对象
BufferedReader reader = new BufferedReader(new InputStreamReader(
url.openStream(), “utf-8”));// 获得当前url的字节流(缓冲)
String input;
while