import java.util.List;
import org.apache.commons.lang.StringUtils;
public class RegUtil {
static final String entityNames[] = new String[] { " ", "<",
">", "&", "¢", "£", "¥", "€", "§",
"©", "®", "™", """, "'" };
static final String entityNumber[] = new String[] { " ", "<",
">", "&", "¢", "£", "¥", "€",
"§", "©", "®", "™", """, "'" };
static final String result[] = new String[] { " ", "<", ">", "&", "¢", "£",
"¥", "€", "§", "©", "®", "™", "\"", "'" };
/**
* 处理html 转义实体.
*
* @param str
* @return
*/
public static String htmlEntities(String str) {
String text = str;
text = StringUtils.replaceEachRepeatedly(text, entityNames, result);
text = StringUtils.replaceEachRepeatedly(text, entityNumber, result);
return text;
}
/**
* 获取匹配字符串
*
* @param str
* 被解析内容
* @param regex
* 解析表达式,要唯一标签
*/
public static String getParseStr(String str, String parseStr) {
if (StringUtils.isEmpty(str)) {
return str;
}
String result = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseStr, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(str);
while (ma.find()) {
result = ma.group(1);
if(result==null){
String value = ma.group();
int startlen = parseStr.indexOf("(");
int endlen = parseStr.length()-parseStr.indexOf(")")-1;
int length = value.length();
value = value.substring(startlen,length-endlen);
value = value.replace("<![CDATA[", "").replace("]]>", "");
result = trimRN(trimLeft(value.trim()));
}
}
return result;
}
/**
* 获取文本范围内容
*
* @param text
* 被解析文件
* @param parseRegex
* 解析表达式
* @return
*/
public static String getParseStrContent(String text, String parseRegex) {
if (StringUtils.isEmpty(text)) {
return text;
}
String reulst = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(text);
while (ma.find()) {
reulst = ma.group(1);
}
return reulst;
}
public static String getParseStrCaseInse(String text, String parseRegex) {
if (StringUtils.isEmpty(text)) {
return text;
}
String reulst = null;
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.CASE_INSENSITIVE);
final java.util.regex.Matcher ma = pa.matcher(text);
if (ma.find()) {
reulst = ma.group(1);
}
return reulst;
}
/**
* 获取全部匹配
*
* @param str
* 被解析内容
* @param parseRegex
* 解析表达式
*/
public static List<String> getParseStrList(String str, String parseRegex) {
return getParseStrList(str, parseRegex, true);
}
/**
* 获取全部匹配
*
* @param str
* 被解析内容
* @param parseRegex
* 解析表达式
*/
public static List<String> getParseStrList(String str, String parseRegex, boolean trimTag) {
List<String> contentUrlList = new ArrayList<String>();
if (StringUtils.isEmpty(str)) {
return contentUrlList;
}
final java.util.regex.Pattern pa = java.util.regex.Pattern.compile(
parseRegex, java.util.regex.Pattern.DOTALL);
final java.util.regex.Matcher ma = pa.matcher(str);
while (ma.find()) {
String value = ma.group();
if(trimTag){
int startlen = parseRegex.indexOf("(");
int endlen = parseRegex.length()-parseRegex.indexOf(")")-1;
int length = value.length();
value = value.substring(startlen,length-endlen);
}
contentUrlList.add(value);
}
return contentUrlList;
}
/**
* 删除字符串中所有HTML标记
*
* @param input
* @return
*/
public static String trimHtml(String str) {
if (str == null) {
return "";
}
str = str.replaceAll("<", "<").replaceAll(">", ">");
str = str.replaceAll("<!\\[CDATA\\[", "").replaceAll("\\]\\]>", "");
str = str.replaceAll("</?[^>]+>", "").replace(" ", " ");
return str;
}
/**
* 左右空格都去掉
*
* @param str
* @return
*/
public static String trim(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("^[ ]+|[ ]+$", "").replaceAll("^[ ]+|[ ]+$",
"");
}
}
/**
* 去左空格
*
* @param str
* @return
*/
public static String trimLeft(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("^[ ]+", "").replaceAll("^[ ]+", "");
}
}
/**
* 删除字符串中所有空格
*
* @param str
* @return
*/
public static String trimAll(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("\\s", "");
}
}
public static String trimRight(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("[ ]+$", "").replaceAll("[ ]+$", "");
}
}
public static String replaceFirst(String str, String regex) {
if (StringUtils.isEmpty(str)) {
return str;
}
if (StringUtils.isEmpty(regex)) {
return str;
}
String[] regexs = regex.split("[|]");
if (regexs.length == 2) {
str = str.replaceFirst(regexs[0], regexs[1]);
}
return str;
}
public static String replaceUrl(String downLoadUrl, String content,
String replace) {
StringBuilder result = new StringBuilder();
if (StringUtils.isEmpty(downLoadUrl)) {
return content;
}
if (StringUtils.isEmpty(replace)) {
return content;
}
String[] urlList = content.split(",");
if (urlList.length > 0) {
for (int i = 0; i < urlList.length; i++) {
result.append(downLoadUrl.replace(replace, urlList[i])).append(
",");
}
result.deleteCharAt(result.length() - 1);
} else {
result.append(downLoadUrl.replace(replace, content));
}
return result.toString();
}
public static String replaceAll(String str, String regex) {
if (StringUtils.isEmpty(str)) {
return str;
}
if (StringUtils.isEmpty(regex)) {
return str;
}
String[] regexs = regex.split("[|]");
if (regexs.length == 1) {
str = str.replaceAll(regexs[0], "");
} else if (regexs.length == 2) {
str = str.replaceAll(regexs[0], regexs[1]);
}
return str;
}
/**
* 删除字符串中所有换行
*
* @param str
* @return
*/
public static String trimRN(String str) {
if (str == null || str.equals("")) {
return str;
} else {
return str.replaceAll("\r", " ").replaceAll("\n", " ").replaceAll(
"\\r", " ").replaceAll("\\n", " ");
}
}
public static String formatFileName(String filename){
if(filename==null || "".equals(filename.trim())) return filename;
return filename.replaceAll("\\\\|/|:|\\*|\\?|\"|<|>|\\|", "");
}
public static void main(String[] args) {
String s = "aa\\bb / cc:dd *ee ?ff \"gg <hh >ii |jj";
System.out.println(s);
System.out.println(formatFileName(s));
//<meta charset="gbk"/>
//<meta charset="utf-8">
//<meta charset="utf-8">
//<meta charset="UTF-8">
//<meta http-equiv="content-type" content="text/html; charset=gb2312" />
String cont = "<description><![CDATA[<a target=\"_blank\" href=\"http://finance.sina.com.cn/money/bank/bank_hydt/20150422/005922012276.shtml\"><img border=\"0\" src=\"http://t1.baidu.com/it/u=http%3A%2F%2Fi3.sinaimg.cn%2Fcj%2Fcr%2F2015%2F0422%2F2968355430.jpg&fm=30\"></a><br>21世纪经济报道从多个渠道获悉,农发行的注资将由财政部负责,初步拟定的方案是以逐年返还农发行上缴的税收这一方式,为该行增加注册资本金。注资量和前述两家银行相当,约为1500亿元。 本报记者 李玉...]]></description>";
String reg = "<description>.*<!\\[CDATA\\[(.*?)\\]\\]>.*</description>";
System.out.println(RegUtil.trimHtml(RegUtil.getParseStr(cont, reg)));
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.configuration.reloading.FileChangedReloadingStrategy;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
public class Config {
private static Logger logger = Logger.getLogger(Config.class);
private static PropertiesConfiguration configuration;
public static Set<String> imgfliter = new HashSet<String>();
/**
* 初始化加载配置
* @throws ConfigurationException
* @description: <方法描述>
*/
public static void init() throws IOException {
String configPath = "./";
File file = new File(configPath);
String absPath = file.getAbsolutePath();
absPath = absPath.substring(0,absPath.length()-2);
String path = absPath + "/conf/config.properties";
PropertyConfigurator.configure(absPath + "/conf/log4j.properties");
try {
configuration = new PropertiesConfiguration(path);
} catch (ConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
logger.info("成功加载:" + path + " 配置文件");
configuration.setReloadingStrategy(new FileChangedReloadingStrategy());
}
public static void init(String homePath) throws IOException {
String path = homePath + "/conf/config.properties";
PropertyConfigurator.configure(homePath + "/conf/log4j.properties");
try {
configuration = new PropertiesConfiguration(path);
} catch (ConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
logger.info("成功加载:" + path + " 配置文件");
configuration.setReloadingStrategy(new FileChangedReloadingStrategy());
}
public static void readImgFliter(String filePath)
{
InputStreamReader isr = null;
BufferedReader br = null;
try{
File file = new File(filePath);
isr = new InputStreamReader(new FileInputStream(file), "UTF-8");
br = new BufferedReader(isr);
while (br.ready()) {
String line = br.readLine();
imgfliter.add(line);
}
br.close();
isr.close();
}
catch(Exception e){
e.printStackTrace();
}
finally{
try{
br.close();
isr.close();
}
catch(Exception ee){}
}
}
public static String getString(String optname) {
return configuration.getString(optname);
}
public static int getInt(String optname) {
return configuration.getInt(optname);
}
public static int getInt(String optname,int defaultValue) {
return configuration.getInt(optname,defaultValue);
}
}
public class CheckCode {
byte[] imgs; //验证码图片
String cookie; //获取验证码对应的cookie
String status = "1"; //获取验证码状态:0:成功;1:无验证码;2:下载失败
public byte[] getImgs() {
return imgs;
}
public void setImgs(byte[] imgs) {
this.imgs = imgs;
}
public String getCookie() {
return cookie;
}
public void setCookie(String cookie) {
this.cookie = cookie;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
}
public class Downloader extends Connector {
private static Logger logger = Logger.getLogger(Downloader.class);
public static Map<String, String> cookies = new HashMap<String, String>();
private int maxSize = 1024 * 1024 * 2;
private int bufferSize = 1024 * 4;
public Downloader() {
}
public String download(String url, String para, String refer, String cookie) {
String content = "";
try {
URL u = new URL(url);
HttpURLConnection conn = (HttpURLConnection) getConnection(u);
conn.setDoOutput(true);
conn.setDoInput(true);
conn.setRequestMethod("POST");
conn.setUseCaches(false);
conn.setInstanceFollowRedirects(true);
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
if (refer != null) {
conn.setRequestProperty("Referer", refer);
conn.setRequestProperty("Host", new URL(refer).getHost());
}
String cookiepath = u.getHost();
if (cookie != null) {
conn.setRequestProperty("Cookie", cookie);
} else {
cookie = cookies.get(cookiepath);
if (cookie != null)
conn.setRequestProperty("Cookie", cookie);
}
conn.connect();
DataOutputStream out = new DataOutputStream(conn.getOutputStream());
out.writeBytes(para);
out.flush();
out.close();
String contentencoding = conn.getHeaderField("Content-Encoding");
InputStream is = null;
if ("gzip".equals(contentencoding))
is = new GZIPInputStream(conn.getInputStream());
else
is = conn.getInputStream();
byte[] buff = new byte[maxSize + bufferSize];
int res = 0;
int readed = 0;
while ((res = is.read(buff, readed, bufferSize)) != -1) {
readed += res;
if (readed >= maxSize)
break;
}
if (cookie == null) {
String cookieVal = "";
String key = null;
for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
if (key.equalsIgnoreCase("set-cookie")) {
if (!"".equals(cookieVal))
cookieVal += ";";
cookieVal += conn.getHeaderField(i).split(";")[0];
}
}
if (cookieVal != null && !"".equals(cookieVal)) {
cookies.put(cookiepath, cookieVal);
}
}
String encoding = null;
String contenttype = conn.getHeaderField("Content-Type");
encoding = Chardet.getCharSetByHead(url, contenttype);
if (encoding == null) {
String tmp = new String(buff, 0, 1024 * 4, "gb2312");
encoding = Chardet.getCharSetByContent(url, tmp);
}
if (encoding == null) {
content = new String(buff, 0, readed, "gb2312");
logger.info("未识别网页编码:" + url);
} else {
content = new String(buff, 0, readed, encoding);
}
close(is);
conn.disconnect();
} catch (Exception e) {
logger.error("下载url:" + url + "出错!", e);
}
return content;
}
public String download_get(String url, String para, String refer, String cookie) throws IOException {
String content = null;
try {
String getURL = String.format("%s?%s", url, para);
URL u = new URL(getURL);
HttpURLConnection conn = (HttpURLConnection) getConnection(u);
conn.setDoInput(true);
conn.setDoOutput(true);
if (refer != null){
conn.setRequestProperty("Referer", refer);
conn.setRequestProperty("Host", new URL(refer).getHost());
}
String cookiepath = u.getHost();
if(cookie!=null){
conn.setRequestProperty("Cookie",cookie);
}
else{
cookie = cookies.get(cookiepath);
if(cookie!=null)
conn.setRequestProperty("Cookie",cookie);
}
conn.connect();
String contentencoding = conn.getHeaderField("Content-Encoding");
InputStream is = null;
if ("gzip".equals(contentencoding))
is = new GZIPInputStream(conn.getInputStream());
else
is = conn.getInputStream();
byte[] buff = new byte[maxSize + bufferSize];
int res = 0;
int readed = 0;
while ((res = is.read(buff, readed, bufferSize)) != -1) {
readed += res;
if (readed >= maxSize)
break;
}
if (cookie == null) {
String cookieVal = "";
String key = null;
for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
if (key.equalsIgnoreCase("set-cookie")) {
if (!"".equals(cookieVal))
cookieVal += ";";
cookieVal += conn.getHeaderField(i).split(";")[0];
}
}
if (cookieVal != null && !"".equals(cookieVal)) {
cookies.put(u.getHost(), cookieVal);
}
}
String encoding = null;
String contenttype = conn.getHeaderField("Content-Type");
encoding = Chardet.getCharSetByHead(url, contenttype);
if (encoding == null) {
String tmp = new String(buff, 0, 1024 * 4, "gb2312");
encoding = Chardet.getCharSetByContent(url, tmp);
}
if (encoding == null) {
content = new String(buff, 0, readed, "gb2312");
logger.info("未识别网页编码:" + url);
} else {
content = new String(buff, 0, readed, encoding);
}
close(is);
conn.disconnect();
} catch (Exception e) {
logger.error("下载url:" + url + "出错!", e);
}
return content;
}
public CheckCode downloadImg(String urlStr, String refer) {
CheckCode checkcode = new CheckCode();
try {
URL url = new URL(urlStr);
URLConnection conn = getConnection(url);
conn.setDoOutput(true);
conn.setConnectTimeout(20000);
conn.setReadTimeout(10000);
if (refer != null) {
conn.setRequestProperty("Referer", refer);
conn.setRequestProperty("Host", new URL(refer).getHost());
}
conn.connect();
String contentencoding = conn.getHeaderField("Content-Encoding");
String cookieVal = "";
String key = null;
for (int i = 1; (key = conn.getHeaderFieldKey(i)) != null; i++) {
if (key.equalsIgnoreCase("set-cookie")) {
if (!"".equals(cookieVal))
cookieVal += ";";
cookieVal += conn.getHeaderField(i).split(";")[0];
}
}
if (cookieVal != null && !"".equals(cookieVal)) {
cookies.put(url.getHost(), cookieVal);
checkcode.setCookie(cookieVal);
}
InputStream is = null;
if ("gzip".equals(contentencoding))
is = new GZIPInputStream(conn.getInputStream());
else
is = conn.getInputStream();
ByteArrayOutputStream os = new ByteArrayOutputStream();
byte[] buff = new byte[bufferSize];
int res;
while ((res = is.read(buff)) != -1) {
os.write(buff, 0, res);
}
checkcode.setImgs(os.toByteArray());
checkcode.setStatus("0");
os.close();
close(is);
} catch (Exception e) {
checkcode.setStatus("2");
logger.error("下载图片:" + urlStr + "出错!");
}
return checkcode;
}
private void close(InputStream is) {
try {
is.close();
} catch (IOException e) {
}
}
public static void main(String[] args) throws InterruptedException, IOException {
Config.init();
Downloader imgdl = new Downloader();
for (int i = 0; i < 1; i++) {
imgdl.downloadImg("http://y0.ifengimg.com/a/2015_18/e8c351adc97fbb7.bmp", "");
}
}
}
public class CheckCodeUtil{
private static Logger logger = Logger.getLogger(CheckCodeUtil.class);
/**
* @param args
*/
public static void main(String[] args) {
try{
JSONObject job = getCheckCode("http://www.stc.gov.cn:8082/szwsjj_web/ImgServlet.action?rnd=0.12",null,"01");
if(job!=null) System.out.println(job.get("checkcode"));
}catch(Exception e){}
}
public static JSONObject getCheckCode(String imgurl, String url, String imgtype) {
HttpURLConnection con = null;
JSONObject job = null;
int tryCount = 1;
int code = -1;
while(tryCount>0 && (job==null || code!=0)){
try {
URL u = new URL(Config.getString("checkcode_url"));
con = (HttpURLConnection) u.openConnection();
con.setRequestMethod("POST");
con.setDoOutput(true);
con.setDoInput(true);
con.setUseCaches(false);
con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
OutputStreamWriter osw = new OutputStreamWriter(con.getOutputStream(), Constants.CHART_SET);
String para = "url=+" + URLEncoder.encode(imgurl,Constants.CHART_SET) + "&type="+imgtype;
para += "&refer=" + URLEncoder.encode(url,Constants.CHART_SET);
osw.write(para);
osw.flush();
osw.close();
//读取返回内容
StringBuffer buffer = new StringBuffer();
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), Constants.CHART_SET));
String temp;
while ((temp = br.readLine()) != null) {
buffer.append(temp);
}
if(!"".equals(buffer.toString())){
job = new JSONObject(buffer.toString());
code = job.getInt("code");
}
} catch (Exception e) {
e.printStackTrace();
logger.error("获取验证码失败!" + e);
} finally {
tryCount--;
if (con != null) {
con.disconnect();
}
}
}
return job;
}
}