自动识别图片验证码登录
目标:从需要会有登录的网站抓取数据。
场景:A网站需要会员登录才能查阅信息,A网站采用了AntiForgery防止XSRF攻击。
创建windows应用,采用webBrowser模拟加载页面,识别验证码然后登录。
1.使用webBrowser导航到登录页
string url = "https://xxxx/Login";
webBrowser1.Navigate(url);
2.获取页面验证码
private Image GetCodeImage(WebBrowser wb, HtmlElement img)
{
var doc = (HTMLDocument)wb.Document.DomDocument;
var body = (HTMLBody)doc.body;
var cr = (IHTMLControlRange)body.createControlRange();
var hImg = img.DomElement as IHTMLControlElement;
cr.add(hImg);
cr.execCommand("Copy", false, null);
Image CodeImage = Clipboard.GetImage();
return CodeImage;
}
3.识别验证码(利用百度OCR技术)
private void button4_Click(object sender, EventArgs e)
{
var API_KEY = "***";//换上自己的key
var SECRET_KEY = "****";//换上自己的key
var client = new Baidu.Aip.Ocr.Ocr(API_KEY, SECRET_KEY);
client.Timeout = 60000;
var ms = new MemoryStream();
this.pictureBox1.Image.Save(ms, ImageFormat.Bmp);//更换图片类型,gif格式无法识别
var data = new byte[ms.Length];
ms.Position = 0;
ms.Read(data, 0, data.Length); ms.Close();
var reusltString = "";
// 调用通用文字识别(高精度版),可能会抛出网络等异常,请使用try/catch捕获
try
{
var result = client.AccurateBasic(data);
reusltString = result["words_result"].First["words"].ToString();
}
catch
{
}
var code = TryGetCode(reusltString);
this.txtCode.Text = code;
if (code.Length < 4)//(4位验证码)验证失败后刷新验证码重新验证
{
HtmlElement name = webBrowser1.Document.GetElementById("CaptchaImage");
if (name != null)
{
name.InvokeMember("click");
HtmlElement img = webBrowser1.Document.GetElementById("CaptchaImage");
if (img != null)
{
img = webBrowser1.Document.GetElementById("CaptchaImage");
var FinalImage = GetCodeImage(webBrowser1, img);
pictureBox1.Image = FinalImage;
}
}
}
}
private string TryGetCode(string result)
{
if (result.Length == 4)
{
var c = result.ToCharArray();
var k = -1;
result = "";
for (int i = 0; i < 4; i++)
{
if (int.TryParse(c[i].ToString(), out k))
{
result += $"{k}";
}
}
}
if (result.Length == 4)
return result;
else
return "";
}
4.获取登录
private void Login()
{
HtmlElement name = webBrowser1.Document.GetElementById("LoginID");
if (name != null)
name.SetAttribute("value", this.txtUserName.Text.Trim());
HtmlElement pass = webBrowser1.Document.GetElementById("Password");
if (pass != null)
pass.SetAttribute("value", this.txtPassword.Text.Trim());
HtmlElement img = webBrowser1.Document.GetElementById("CaptchaImage");
if (img != null)
{
img = webBrowser1.Document.GetElementById("CaptchaImage");
var FinalImage = GetCodeImage(webBrowser1, img);
pictureBox1.Image = FinalImage;
}
HtmlElement btnAgree = webBrowser1.Document.GetElementById("btn_OK");
if (btnAgree != null)
{
btnAgree.InvokeMember("click");
this.Text = "登录成功!";
}
}
- 通过主页a标签 加载iframe页面
if (webBrowser1.Url.AbsoluteUri == "https://h.kfun222.com/")
{
try
{
HtmlElementCollection list = webBrowser1.Document.GetElementsByTagName("a");
foreach (HtmlElement a in list)
{
if (a.GetAttribute("data-id") == "1")
{
a.InvokeMember("click");
timer.Start();
return;
}
}
}
catch
{
}
}
小结:可以使用timer实现当获取验证码失败后自动刷新验证码再次识别,直到成功后登录系统。