工具类

import java.util.ArrayList;
import java.util.List;


import org.apache.commons.lang.StringUtils;


public class RegUtil {


static final String entityNames[] = new String[] { " ", "<",
">", "&", "¢", "£", "¥", "€", "§",
"©", "®", "™", """, "'" };


static final String entityNumber[] = new String[] { " ", "<",
">", "&", "¢", "£", "¥", "€",
"§", "©", "®", "™", """, "'" };


static final String result[] = new String[] { " ", "<", ">", "&", "¢", "£",
"¥", "€", "§", "©", "®", "™", "\"", "'" };


/**
* 处理html 转义实体.

* @param str
* @return
*/
public static String htmlEntities(String str) {
String text = str;
text = StringUtils.replaceEachRepeatedly(text, entityNames, result);
text = StringUtils.replaceEachRepeatedly(text, entityNumber, result);
return text;
}


/**
* 获取匹配字符串

* @param str
*            被解析内容
* @param regex
*            解析表达式,要唯一标签
*/
public static String getParseStr(String str, String parseStr) {
if (StringUtils.isEmpty(str)) {
return str;
}
String result = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseStr, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(str);
while (ma.find()) {
result = ma.group(1);
if(result==null){
String value = ma.group();
int startlen = parseStr.indexOf("(");
int endlen = parseStr.length()-parseStr.indexOf(")")-1;
int length = value.length();
value = value.substring(startlen,length-endlen);
value = value.replace("<![CDATA[", "").replace("]]>", "");
result = trimRN(trimLeft(value.trim()));
}
}
return result;
}


/**
* 获取文本范围内容

* @param text
*            被解析文件
* @param parseRegex
*            解析表达式
* @return
*/
public static String getParseStrContent(String text, String parseRegex) {
if (StringUtils.isEmpty(text)) {
return text;
}


String reulst = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(text);
while (ma.find()) {
reulst = ma.group(1);
}
return reulst;
}

public static String getParseStrCaseInse(String text, String parseRegex) {
if (StringUtils.isEmpty(text)) {
return text;
}


String reulst = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.CASE_INSENSITIVE);
final java.util.regex.Matcher ma = pa.matcher(text);
if (ma.find()) {
reulst = ma.group(1);
}
return reulst;
}


/**
* 获取全部匹配

* @param str
*            被解析内容
* @param parseRegex
*            解析表达式
*/
public static List<String> getParseStrList(String str, String parseRegex) {
return getParseStrList(str, parseRegex, true);
}

/**
* 获取全部匹配

* @param str
*            被解析内容
* @param parseRegex
*            解析表达式
*/
public static List<String> getParseStrList(String str, String parseRegex, boolean trimTag) {
List<String> contentUrlList = new ArrayList<String>();
if (StringUtils.isEmpty(str)) {
return contentUrlList;
}
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(str);


while (ma.find()) {
String value = ma.group();
if(trimTag){
int startlen = parseRegex.indexOf("(");
int endlen = parseRegex.length()-parseRegex.indexOf(")")-1;
int length = value.length();
value = value.substring(startlen,length-endlen);
}
contentUrlList.add(value);
}
return contentUrlList;
}


/**
* 删除字符串中所有HTML标记

* @param input
* @return
*/
public static String trimHtml(String str) {
if (str == null) {
return "";
}
str = str.replaceAll("&lt;", "<").replaceAll("&gt;", ">");
str = str.replaceAll("<!\\[CDATA\\[", "").replaceAll("\\]\\]>", "");
str = str.replaceAll("</?[^>]+>", "").replace("&nbsp;", " ");
return str;
}


/**
* 左右空格都去掉

* @param str
* @return
*/
public static String trim(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("^[ ]+|[ ]+$", "").replaceAll("^[ ]+|[ ]+$",
"");
}
}


/**
* 去左空格

* @param str
* @return
*/
public static String trimLeft(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("^[ ]+", "").replaceAll("^[ ]+", "");
}
}


/**
* 删除字符串中所有空格

* @param str
* @return
*/
public static String trimAll(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("\\s", "");
}
}


public static String trimRight(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("[ ]+$", "").replaceAll("[ ]+$", "");
}
}


public static String replaceFirst(String str, String regex) {
if (StringUtils.isEmpty(str)) {
return str;
}


if (StringUtils.isEmpty(regex)) {
return str;
}
String[] regexs = regex.split("[|]");
if (regexs.length == 2) {
str = str.replaceFirst(regexs[0], regexs[1]);
}
return str;
}


public static String replaceUrl(String downLoadUrl, String content,
String replace) {
StringBuilder result = new StringBuilder();


if (StringUtils.isEmpty(downLoadUrl)) {
return content;
}
if (StringUtils.isEmpty(replace)) {
return content;
}


String[] urlList = content.split(",");


if (urlList.length > 0) {
for (int i = 0; i < urlList.length; i++) {
result.append(downLoadUrl.replace(replace, urlList[i])).append(
",");
}
result.deleteCharAt(result.length() - 1);
} else {
result.append(downLoadUrl.replace(replace, content));
}


return result.toString();
}


public static String replaceAll(String str, String regex) {
if (StringUtils.isEmpty(str)) {
return str;
}


if (StringUtils.isEmpty(regex)) {
return str;
}


String[] regexs = regex.split("[|]");


if (regexs.length == 1) {
str = str.replaceAll(regexs[0], "");
} else if (regexs.length == 2) {
str = str.replaceAll(regexs[0], regexs[1]);
}
return str;
}


/**
* 删除字符串中所有换行

* @param str
* @return
*/
public static String trimRN(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("\r", " ").replaceAll("\n", " ").replaceAll(
"\\r", " ").replaceAll("\\n", " ");
}
}

public static String formatFileName(String filename){
if(filename==null || "".equals(filename.trim())) return filename;
return filename.replaceAll("\\\\|/|:|\\*|\\?|\"|<|>|\\|", "");
}

public static void main(String[] args) {

String s = "aa\\bb / cc:dd *ee ?ff \"gg <hh >ii |jj";
System.out.println(s);
System.out.println(formatFileName(s));
//<meta charset="gbk"/>
//<meta charset="utf-8">
//<meta charset="utf-8">
//<meta charset="UTF-8">
//<meta http-equiv="content-type" content="text/html; charset=gb2312" />
String cont = "<description><![CDATA[<a target=\"_blank\" href=\"http://finance.sina.com.cn/money/bank/bank_hydt/20150422/005922012276.shtml\"><img border=\"0\" src=\"http://t1.baidu.com/it/u=http%3A%2F%2Fi3.sinaimg.cn%2Fcj%2Fcr%2F2015%2F0422%2F2968355430.jpg&fm=30\"></a><br>21世纪经济报道从多个渠道获悉,农发行的注资将由财政部负责,初步拟定的方案是以逐年返还农发行上缴的税收这一方式,为该行增加注册资本金。注资量和前述两家银行相当,约为1500亿元。   本报记者 李玉...]]></description>";
String reg = "<description>.*<!\\[CDATA\\[(.*?)\\]\\]>.*</description>";
System.out.println(RegUtil.trimHtml(RegUtil.getParseStr(cont, reg)));
}

}



import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;


import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.configuration.reloading.FileChangedReloadingStrategy;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;


public class Config {


private static Logger logger = Logger.getLogger(Config.class);
private static PropertiesConfiguration configuration;
public static Set<String> imgfliter = new HashSet<String>();

/**
* 初始化加载配置
* @throws ConfigurationException 
* @description: <方法描述>
*/
public static void init() throws IOException {

String configPath = "./";
        File file = new File(configPath);
        String absPath = file.getAbsolutePath();
        absPath = absPath.substring(0,absPath.length()-2);
        String path = absPath + "/conf/config.properties";
        PropertyConfigurator.configure(absPath + "/conf/log4j.properties");
        
try {
configuration = new PropertiesConfiguration(path);
} catch (ConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


        logger.info("成功加载:" + path + " 配置文件");
        configuration.setReloadingStrategy(new FileChangedReloadingStrategy());
}

public static void init(String homePath) throws IOException {

        String path = homePath + "/conf/config.properties";
        PropertyConfigurator.configure(homePath + "/conf/log4j.properties");
        
try {
configuration = new PropertiesConfiguration(path);
} catch (ConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


        logger.info("成功加载:" + path + " 配置文件");
        configuration.setReloadingStrategy(new FileChangedReloadingStrategy());
}

public static void readImgFliter(String filePath)
    {  
    InputStreamReader isr = null;
    BufferedReader br = null;
   
    try{
    File file = new File(filePath);
    isr = new InputStreamReader(new FileInputStream(file), "UTF-8");
br = new BufferedReader(isr);


while (br.ready()) {
String line = br.readLine();
imgfliter.add(line);
}

br.close();
isr.close();
    }
    catch(Exception e){
    e.printStackTrace();
    }
    finally{
    try{
    br.close();
    isr.close();
    }
    catch(Exception ee){}
    }
    }

public static String getString(String optname) {
return configuration.getString(optname);
}

public static int getInt(String optname) {
return configuration.getInt(optname);
}

public static int getInt(String optname,int defaultValue) {
return configuration.getInt(optname,defaultValue);
}
}



public class CheckCode {

byte[] imgs; //验证码图片
String cookie; //获取验证码对应的cookie
String status = "1"; //获取验证码状态:0:成功;1:无验证码;2:下载失败

public byte[] getImgs() {
return imgs;
}
public void setImgs(byte[] imgs) {
this.imgs = imgs;
}
public String getCookie() {
return cookie;
}
public void setCookie(String cookie) {
this.cookie = cookie;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
}



public class Downloader extends Connector {


    private static Logger logger = Logger.getLogger(Downloader.class);
    public static Map<String, String> cookies = new HashMap<String, String>();


    private int maxSize = 1024 * 1024 * 2;
    private int bufferSize = 1024 * 4;


    public Downloader() {
    }


    public String download(String url, String para, String refer, String cookie) {


        String content = "";
        try {
            URL u = new URL(url);
            HttpURLConnection conn = (HttpURLConnection) getConnection(u);


            conn.setDoOutput(true);
            conn.setDoInput(true);
            conn.setRequestMethod("POST");
            conn.setUseCaches(false);
            conn.setInstanceFollowRedirects(true);
            conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
            if (refer != null) {
                conn.setRequestProperty("Referer", refer);
                conn.setRequestProperty("Host", new URL(refer).getHost());
            }
            String cookiepath = u.getHost();
            if (cookie != null) {
                conn.setRequestProperty("Cookie", cookie);
            } else {
                cookie = cookies.get(cookiepath);
                if (cookie != null)
                    conn.setRequestProperty("Cookie", cookie);
            }
            conn.connect();


            DataOutputStream out = new DataOutputStream(conn.getOutputStream());
            out.writeBytes(para);
            out.flush();
            out.close();


            String contentencoding = conn.getHeaderField("Content-Encoding");
            InputStream is = null;
            if ("gzip".equals(contentencoding))
                is = new GZIPInputStream(conn.getInputStream());
            else
                is = conn.getInputStream();


            byte[] buff = new byte[maxSize + bufferSize];
            int res = 0;
            int readed = 0;
            while ((res = is.read(buff, readed, bufferSize)) != -1) {
                readed += res;
                if (readed >= maxSize)
                    break;
            }


            if (cookie == null) {
                String cookieVal = "";
                String key = null;
                for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
                    if (key.equalsIgnoreCase("set-cookie")) {
                        if (!"".equals(cookieVal))
                            cookieVal += ";";
                        cookieVal += conn.getHeaderField(i).split(";")[0];
                    }
                }
                if (cookieVal != null && !"".equals(cookieVal)) {
                    cookies.put(cookiepath, cookieVal);
                }
            }


            String encoding = null;
            String contenttype = conn.getHeaderField("Content-Type");
            encoding = Chardet.getCharSetByHead(url, contenttype);
            if (encoding == null) {
                String tmp = new String(buff, 0, 1024 * 4, "gb2312");
                encoding = Chardet.getCharSetByContent(url, tmp);
            }
            if (encoding == null) {
                content = new String(buff, 0, readed, "gb2312");
                logger.info("未识别网页编码:" + url);
            } else {
                content = new String(buff, 0, readed, encoding);
            }
            close(is);
            conn.disconnect();
        } catch (Exception e) {
            logger.error("下载url:" + url + "出错!", e);
        }


        return content;
    }


    public String download_get(String url, String para, String refer, String cookie) throws IOException {
        String content = null;
        try {
            String getURL = String.format("%s?%s", url, para);
            URL u = new URL(getURL);
            HttpURLConnection conn = (HttpURLConnection) getConnection(u);
            conn.setDoInput(true);
            conn.setDoOutput(true);


            if (refer != null){
                conn.setRequestProperty("Referer", refer);
                conn.setRequestProperty("Host", new URL(refer).getHost());
            }
            
String cookiepath = u.getHost();
if(cookie!=null){
conn.setRequestProperty("Cookie",cookie);
}
else{
cookie = cookies.get(cookiepath);
if(cookie!=null)
conn.setRequestProperty("Cookie",cookie);
}

            conn.connect();


            String contentencoding = conn.getHeaderField("Content-Encoding");
            InputStream is = null;
            if ("gzip".equals(contentencoding))
                is = new GZIPInputStream(conn.getInputStream());
            else
                is = conn.getInputStream();


            byte[] buff = new byte[maxSize + bufferSize];
            int res = 0;
            int readed = 0;
            while ((res = is.read(buff, readed, bufferSize)) != -1) {
                readed += res;
                if (readed >= maxSize)
                    break;
            }


            if (cookie == null) {
                String cookieVal = "";
                String key = null;
                for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
                    if (key.equalsIgnoreCase("set-cookie")) {
                        if (!"".equals(cookieVal))
                            cookieVal += ";";
                        cookieVal += conn.getHeaderField(i).split(";")[0];
                    }
                }
                if (cookieVal != null && !"".equals(cookieVal)) {
                    cookies.put(u.getHost(), cookieVal);
                }
            }


            String encoding = null;
            String contenttype = conn.getHeaderField("Content-Type");
            encoding = Chardet.getCharSetByHead(url, contenttype);
            if (encoding == null) {
                String tmp = new String(buff, 0, 1024 * 4, "gb2312");
                encoding = Chardet.getCharSetByContent(url, tmp);
            }
            if (encoding == null) {
                content = new String(buff, 0, readed, "gb2312");
                logger.info("未识别网页编码:" + url);
            } else {
                content = new String(buff, 0, readed, encoding);
            }
            close(is);
            conn.disconnect();
        } catch (Exception e) {
            logger.error("下载url:" + url + "出错!", e);
        }


        return content;
    }


    public CheckCode downloadImg(String urlStr, String refer) {


        CheckCode checkcode = new CheckCode();
        try {
            URL url = new URL(urlStr);
            URLConnection conn = getConnection(url);
            conn.setDoOutput(true);
            conn.setConnectTimeout(20000);
            conn.setReadTimeout(10000);
            
            if (refer != null) {
                conn.setRequestProperty("Referer", refer);
                conn.setRequestProperty("Host", new URL(refer).getHost());
            }


            conn.connect();


            String contentencoding = conn.getHeaderField("Content-Encoding");
            String cookieVal = "";
            String key = null;
            for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
                if (key.equalsIgnoreCase("set-cookie")) {
                    if (!"".equals(cookieVal))
                        cookieVal += ";";
                    cookieVal += conn.getHeaderField(i).split(";")[0];
                }
            }
            if (cookieVal != null && !"".equals(cookieVal)) {
                cookies.put(url.getHost(), cookieVal);
                checkcode.setCookie(cookieVal);
            }


            InputStream is = null;
            if ("gzip".equals(contentencoding))
                is = new GZIPInputStream(conn.getInputStream());
            else
                is = conn.getInputStream();


            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] buff = new byte[bufferSize];
            int res;
            while ((res = is.read(buff)) != -1) {
                os.write(buff, 0, res);
            }
            checkcode.setImgs(os.toByteArray());
            checkcode.setStatus("0");
            os.close();
            close(is);
        } catch (Exception e) {
            checkcode.setStatus("2");
            logger.error("下载图片:" + urlStr + "出错!");
        }


        return checkcode;
    }


    private void close(InputStream is) {
        try {
            is.close();
        } catch (IOException e) {
        }
    }


    public static void main(String[] args) throws InterruptedException, IOException {
        Config.init();


        Downloader imgdl = new Downloader();
        for (int i = 0; i < 1; i++) {
            imgdl.downloadImg("http://y0.ifengimg.com/a/2015_18/e8c351adc97fbb7.bmp", "");
        }
    }
}


public class CheckCodeUtil{


private static Logger logger = Logger.getLogger(CheckCodeUtil.class);

/**
* @param args
*/
public static void main(String[] args) {

try{
JSONObject job = getCheckCode("http://www.stc.gov.cn:8082/szwsjj_web/ImgServlet.action?rnd=0.12",null,"01");
if(job!=null) System.out.println(job.get("checkcode"));
}catch(Exception e){}
}


public static JSONObject getCheckCode(String imgurl, String url, String imgtype) {

HttpURLConnection con = null;
JSONObject job = null;
int tryCount = 1;
int code = -1;
while(tryCount>0 && (job==null || code!=0)){
try {
URL u = new URL(Config.getString("checkcode_url"));

con = (HttpURLConnection) u.openConnection();
con.setRequestMethod("POST");
con.setDoOutput(true);
con.setDoInput(true);
con.setUseCaches(false);
con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
OutputStreamWriter osw = new OutputStreamWriter(con.getOutputStream(), Constants.CHART_SET);
String para = "url=+" + URLEncoder.encode(imgurl,Constants.CHART_SET) + "&type="+imgtype;
para += "&refer=" + URLEncoder.encode(url,Constants.CHART_SET);
osw.write(para);
osw.flush();
osw.close();

//读取返回内容
StringBuffer buffer = new StringBuffer();
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), Constants.CHART_SET));
String temp;
while ((temp = br.readLine()) != null) {
buffer.append(temp);
}
if(!"".equals(buffer.toString())){
job = new JSONObject(buffer.toString());
code = job.getInt("code");
}
} catch (Exception e) {
e.printStackTrace();
logger.error("获取验证码失败!" + e);
} finally {
tryCount--;
if (con != null) {
con.disconnect();
}
}
}
return job;
}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值