1、使用的是tess4j识别验证码;
2、使用jsoup模拟浏览器登录请求。
package com.test.tess;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.SerializableString;
import com.jst.tess.constants.Constants;
import com.jst.tess.util.FileUtils;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.struts2.ServletActionContext;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import sun.net.www.http.HttpClient;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test4 extends HttpServlet{
//登录链接
private static String baseUrl = "http://192.168.0.20:8080/test/login.jsp";
//验证码保存路径
private static String verCodePath = "D:\\img\\codeimg";
//验证码请求地址
private static String codeimgurl = "http://192.168.0.20:8080/test/login/getCode.do";
//登录地址
private static String loginUrl = "http://192.168.0.20:8080/test/login/login.do";
//注销地址
private static String logoutUrl = "http://192.168.0.20:8080/test/login/logout.do";
//测试数据列表路径
private static String listUrl = "http://192.168.0.20:8080/test/testList/getList.do";
//测试数据详情路径
private static String getOneUrl = "http://192.168.0.20:8080/test/testView/view.do";
//用户名
private static String userName = "test";
//密码
private static String passWord = "96af831e99ef1788b04c84d0a7782e855d700d4d6e7938722cfbcbaa";
//判断是否进入首页标识,根据id属性获取
private static String ifIndexPage = "index-menu";
//全局session信息
private static String baseSessions ="";
public static void main(String[] args) throws IOException, TesseractException {
// login();
// getList();
// getOne("9");
/**测试识别验证码阈值
byte[] codeimgdata = Jsoup.connect(codeimgurl2).ignoreContentType(true).execute().bodyAsBytes();
FileUtils.saveImg(codeimgdata, verCodePath, "codeimg.jpg");
//识别样本输出地址
String ocrResult = verCodePath+"\\codetmpimgtmp.jpg";
String OriginalImg = verCodePath+"\\codeimg.jpg";
//去噪点
FileUtils.removeBackground(OriginalImg, ocrResult);
ITesseract instance =new Tesseract();
//获得Tesseract的文字库
URL url2 = ClassLoader.getSystemResource("tessdata");
String tesspath = url2.getPath().substring(1);
instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim");
File imgDir =new File(ocrResult);
String code = instance.doOCR(imgDir);//识别验证码
code = replaceBlank(code);
System.out.println("codeLength:"+code.length()+",code:"+code);
测试识别验证码阈值结束*/
}
/**
*
* @param url 系统地址
* @param user 用户名
* @param pwd 密码
* @param tess4jpath tess4j的地址 如G:\test\Tess4J-3.4.8-src\Tess4J D:\home\55.png
* @return
*/
public Map login(String url, String user, String pwd, String tess4jpath) {
System.out.println("begin:");
Map<String,String> map = null;
Connection.Response LoginResponse = null;
try {
LoginResponse = Jsoup.connect(url).method(Connection.Method.GET).execute();
map = LoginResponse.cookies();//获取会话,登录后需要保持会话
String sessName = "JSESSIONID";
String sessions = (String) map.get("JSESSIONID");
System.out.println("sessions="+sessions);
// System.out.println("map1:"+map.toString());
// Document document = LoginResponse.parse();
// Element element = document.getElementById("varifyCodeImg");
// String codeimgurl2 = element.attr("id");
// System.out.println("222222:"+codeimgurl2);
String codeimgurl = "http://192.168.0.37:8080/test/login/getCode.do";
String connectPath = "http://192.168.0.37:8080/test/login/login.do";
String codeimgpath = tess4jpath+"\\codeimg";
//下载验证码图片
byte[] codeimgdata = Jsoup.connect(codeimgurl).header("Cookie",sessName + "=" + sessions).ignoreContentType(true).execute().bodyAsBytes();
FileUtils.saveImg(codeimgdata, codeimgpath, "codeimg.jpg");
//识别样本输出地址
String ocrResult = codeimgpath+"\\codetmpimgtmp.jpg";
String OriginalImg = codeimgpath+"\\codeimg.jpg";
//去噪点
FileUtils.removeBackground(OriginalImg, ocrResult);
ITesseract instance =new Tesseract();
instance.setDatapath(tess4jpath);
//获得Tesseract的文字库
URL url2 = ClassLoader.getSystemResource("tessdata");
String tesspath = url2.getPath().substring(1);
instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim");
File imgDir =new File(OriginalImg);
String code = instance.doOCR(imgDir);//识别验证码
code = replaceBlank(code);
System.out.println("codeLength:"+code.length()+",code:"+code);
Map datas = new HashMap();
datas.put("username", user);
datas.put("loginkey", pwd);
datas.put("verifycode",code);
// Connection.Response connection = Jsoup.connect(connectPath).header("Cookie",sessName + "=" + sessions).data(datas).execute();
// connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
//
// connection.header("Accept-Encoding", "gzip, deflate, br");
//
// connection.header("Accept-Language", "zh-CN,zh;q=0.9");
//
// connection.header("Cache-Control", "max-age=0");
//
// connection.header("Connection", "Keep-Alive");
//
// connection.header("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
//
connection.header("Host", "http://192.168.0.37:8080");
//
// connection.header("Cookie", sessions);
//
// connection.header("Referer", "http://192.168.0.37:8080/test/login.jsp;"+sessions);
//
// connection.header("Sec-Fetch-Dest", "document");
//
// connection.header("Sec-Fetch-Mode", "navigate");
//
// connection.header("Sec-Fetch-Site", "same-origin");
//
// connection.header("Sec-Fetch-Use", "?1");
//
// connection.header("Upgrade-Insecure-Requests", "1");
//
// connection.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36");
// Response response = conn.ignoreContentType(true).method(Method.POST).data(map).cookies(map).execute();
Document document=Jsoup.connect(connectPath).header("Cookie",sessName + "=" + sessions).data(datas).post();
// Connection.Response response1 = connection.data(datas).method(Connection.Method.POST).execute();
// System.out.println("response:"+document.body());
String listUrl = "http://192.168.0.37:8080/test/testList/getList.do";
Map datas2 = new HashMap();
datas.put("page", 1);
datas.put("rows", 10);
datas.put("sort","checkDate");
datas.put("order","desc");
Document document2=Jsoup.connect(listUrl).header("Cookie",sessName + "=" + sessions).data(datas2).post();
String retString = document2.body().text();
System.out.println(retString);
} catch (IOException e) {
map = null;
e.printStackTrace();
} catch (TesseractException e) {
map = null;
e.printStackTrace();
}finally {
System.out.println("map:"+map);
return map;
}
}
public static String getList() {
String sessinId = "JSESSIONID";
String retString = "";
try {
// String sessMess = (String) session.getAttribute(Constants.SESSION_ID);
System.out.println("session:"+baseSessions);
Map datas2 = new HashMap();
// datas2.put("page", 1);
// datas2.put("rows", 10);
// datas2.put("sort","checkDate");
// datas2.put("order","desc");
if(baseSessions !=null){
Document document2=Jsoup.connect(listUrl).header("Cookie",sessinId + "=" + baseSessions).data(datas2).post();
System.out.println(document2.body());
if(document2.getElementById("verifycode") == null && document2.body() !=null){
System.out.println("不需要重新登录!");
Document document3=Jsoup.connect(listUrl).
header("Cookie",sessinId + "=" + baseSessions).
data(datas2).ignoreContentType(true).post();
retString = document2.body().text();
System.out.println(retString);
return retString;
}else{
System.out.println("需要重新登录!");
login();
getList();
}
}else{
login();
getList();
}
}catch (IOException e){
System.out.println("进入异常!");
retString = e.toString();
}
return retString;
}
public static String getOne(String id){
String sessinId = "JSESSIONID";
String retString = "";
Connection.Response connResponse = null;
try {
// String sessMess = (String) session.getAttribute(Constants.SESSION_ID);
System.out.println("session:"+baseSessions);
Map datas2 = new HashMap();
datas2.put("id", id);
if(baseSessions !=null){
Document document2=Jsoup.connect(getOneUrl).
header("Cookie",sessinId + "=" + baseSessions).
data(datas2).timeout(10000).post();
// System.out.println(document2.body());
if(document2.getElementById("verifycode") == null && document2.body() !=null){
System.out.println("不需要重新登录!");
Document document3=Jsoup.connect(getOneUrl).header("Cookie",sessinId + "=" + baseSessions).data(datas2).timeout(10000).post();
connResponse = Jsoup.connect(getOneUrl).header("Cookie",sessinId + "=" + baseSessions).data(datas2).timeout(10000).execute();
System.out.println(document3);
Elements elementtds = document3.select("td");
for(Element element : elementtds){
String qymc = element.text();
System.out.println(qymc);
}
retString = document3.body().text();
System.out.println(retString);
return retString;
}else{
System.out.println("需要重新登录!");
login();
getOne(id);
}
}else{
login();
getOne(id);
}
}catch (IOException e){
System.out.println("进入异常!"+e.toString());
retString = e.toString();
}
return retString;
}
public static JSONObject login(){
Map<String,String> map = null;
Connection.Response LoginResponse = null;
String sessions = "";
HttpSession session = null;
JSONObject jsonObject = new JSONObject();
try {
LoginResponse = Jsoup.connect(baseUrl).method(Connection.Method.GET).execute();
map = LoginResponse.cookies();//获取会话,登录后需要保持会话
String sessName = "JSESSIONID";
sessions = (String) map.get("JSESSIONID");
System.out.println("sessions="+sessions);
//下载验证码图片
byte[] codeimgdata = Jsoup.connect(codeimgurl).header("Cookie",sessName + "=" + sessions).ignoreContentType(true).execute().bodyAsBytes();
FileUtils.saveImg(codeimgdata, verCodePath, "codeimg.jpg");
//识别样本输出地址
String ocrResult = verCodePath+"\\codetmpimgtmp.jpg";
String OriginalImg = verCodePath+"\\codeimg.jpg";
//去噪点
FileUtils.removeBackground(OriginalImg, ocrResult);
ITesseract instance =new Tesseract();
//获得Tesseract的文字库
URL url2 = ClassLoader.getSystemResource("tessdata");
String tesspath = url2.getPath().substring(1);
instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim");
File imgDir =new File(OriginalImg);
String code = instance.doOCR(imgDir);//识别验证码
code = replaceBlank(code);
System.out.println("codeLength:"+code.length()+",code:"+code);
Map datas = new HashMap();
datas.put("username", userName);
datas.put("loginkey", passWord);
datas.put("verifycode",code);
Document document=Jsoup.connect(loginUrl).header("Cookie",sessName + "=" + sessions).data(datas).post();
System.out.println("response:"+document);
String ifIndexMess = document.getElementById(ifIndexPage).toString();
if(ifIndexMess != null){//当前访问的是首页
System.out.println("进入了首页!");
baseSessions = sessions;
jsonObject.put("code","200");
jsonObject.put("sessionId",sessions);
}else{
jsonObject.put("code","999");
jsonObject.put("sessionId","未成功进入首页"+document);
System.out.println("未成功进入首页!"+ifIndexMess);
baseSessions = null;
}
} catch (Exception e) {
jsonObject.put("code","999");
jsonObject.put("sessionId",e.toString());
e.printStackTrace();
}
return jsonObject;
}
public static void logout(String sessionId){
try {
Document logoutDoc = Jsoup.connect(logoutUrl).cookie("JSESSIONID", sessionId).post();
System.out.println("注销成功!");
}catch (IOException e){
System.out.println("进入异常!");
}
}
public static String replaceBlank(String str) {
String dest = "";
if (str != null) {
Pattern p = Pattern.compile("\\s*|\t|\r|\n");
Matcher m = p.matcher(str);
dest = m.replaceAll("");
}
return dest;
}
}
使用的工具类:
package com.jst.tess.util;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
public class FileUtils {
/**
* 级联创建目录
* @param path
*/
public static void creatDir(String path) {
File file = new File(path);
if(!file.exists()) {
file.mkdirs();
}
}
/**
* 验证码图片处理
* @param imgUrl
* @param resUrl
*/
public static void removeBackground(String imgUrl, String resUrl){
//定义一个临界阈值
int threshold = 400;
try{
BufferedImage img = ImageIO.read(new File(imgUrl));
int width = img.getWidth();
int height = img.getHeight();
for(int i = 1;i < width;i++){
for (int x = 0; x < width; x++){
for (int y = 0; y < height; y++){
Color color = new Color(img.getRGB(x, y));
//System.out.println("red:"+color.getRed()+" | green:"+color.getGreen()+" | blue:"+color.getBlue());
int num = color.getRed()+color.getGreen()+color.getBlue();
if(num >= threshold){
img.setRGB(x, y, Color.WHITE.getRGB());
}
}
}
}
for(int i = 1;i<width;i++){
Color color1 = new Color(img.getRGB(i, 1));
int num1 = color1.getRed()+color1.getGreen()+color1.getBlue();
for (int x = 0; x < width; x++)
{
for (int y = 0; y < height; y++)
{
Color color = new Color(img.getRGB(x, y));
int num = color.getRed()+color.getGreen()+color.getBlue();
if(num==num1){
img.setRGB(x, y, Color.BLACK.getRGB());
}else{
img.setRGB(x, y, Color.WHITE.getRGB());
}
}
}
}
File file = new File(resUrl);
if (!file.exists())
{
File dir = file.getParentFile();
if (!dir.exists())
{
dir.mkdirs();
}
try
{
file.createNewFile();
}
catch (IOException e)
{
e.printStackTrace();
}
}
ImageIO.write(img, "jpg", file);
}catch (Exception e){
e.printStackTrace();
}
}
/**
* 保存文件
* @param imgdata
* @param filePath
* @param filename
*/
public static void saveImg(byte[] imgdata,String filePath,String filename) {
BufferedOutputStream bos = null;
FileOutputStream fos = null;
File file = null;
File dir = new File(filePath);
try {
if(!dir.exists()&&dir.isDirectory()) {
dir.mkdirs();
}
file = new File(filePath+File.separator+filename);
fos = new FileOutputStream(file);
bos = new BufferedOutputStream(fos);
bos.write(imgdata);
} catch (Exception e) {
e.printStackTrace();
}finally{
if(bos!=null) {
try {
bos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(fos!=null) {
try {
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
部分代码参考自:Java识别验证码和图像处理_梁康h的博客-CSDN博客