下载器草稿pdf

最新推荐文章于 2023-09-19 11:17:04 发布

静山晚风

最新推荐文章于 2023-09-19 11:17:04 发布

阅读量933

点赞数

分类专栏： Java基础 Poi-Pdf-Excel-Word

本文链接：https://blog.csdn.net/cgwcgw_/article/details/17555727

版权

Java基础同时被 2 个专栏收录

152 篇文章 0 订阅

订阅专栏

Poi-Pdf-Excel-Word

13 篇文章 0 订阅

订阅专栏

package com.blog.test;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlParser {
	public  String getHtmlContent(URL url, String encode) {
		StringBuffer contentBuffer = new StringBuffer();

		int responseCode = -1;
		HttpURLConnection con = null;
		try {
			con = (HttpURLConnection) url.openConnection();
			con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
			con.setConnectTimeout(60000);
			con.setReadTimeout(60000);
			// 获得网页返回信息码
			responseCode = con.getResponseCode();
			if (responseCode == -1) {
				System.out.println(url.toString() + " : connection is failure...");
				con.disconnect();
				return null;
			}
			if (responseCode >= 400) // 请求失败
			{
				System.out.println("请求失败:get response code: " + responseCode);
				con.disconnect();
				return null;
			}

			InputStream inStr = con.getInputStream();
			InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
			BufferedReader buffStr = new BufferedReader(istreamReader);

			String str = null;
			while ((str = buffStr.readLine()) != null)
				contentBuffer.append(str);
			inStr.close();
		} catch (IOException e) {
			e.printStackTrace();
			contentBuffer = null;
			System.out.println("error: " + url.toString());
		} finally {
			con.disconnect();
		}
		return contentBuffer.toString();
	}

	public  String getHtmlContent(String url, String encode) {
		if (!url.toLowerCase().startsWith("http://")) {
			url = "http://" + url;
		}
		try {
			URL rUrl = new URL(url);
			return getHtmlContent(rUrl, encode);
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	public static void main(String argsp[]){
		HtmlParser parse = new HtmlParser();
		parse.parse("http://blog.csdn.net/cgwcgw_/article/details/17531323");
	}
	
	public void parse(String alitbaseAllUrl)
	{
		String allContent = getHtmlContent(alitbaseAllUrl,"utf-8");
		String content = getContent(allContent);
		System.out.println(content);
		
		
	}
	
	public String getContent(String allContent)
	{
		String content = "";
		String s=allContent;
        String regex="<*";        
        Pattern pt=Pattern.compile(regex);
        Matcher mt=pt.matcher(s);
        while(mt.find())
        {
            content = mt.group();
        }
		
		return content;
		
		
	}
	
}

静山晚风

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
下载器草稿pdf

1package com.blog.test;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.U
复制链接

扫一扫