本文将介绍如何使用C#语言来实现文字识别验证码的自动化处理。具体步骤包括提取目标文字和背景图文字,计算点击坐标并模拟点击。
一、目标文字识别
首先,我们需要提取目标文字的图片URL并下载。
csharp
using System;
using System.Drawing;
using System.IO;
using System.Net;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using Tesseract;
class Program
{
static void Main(string[] args)
{
var driver = new ChromeDriver();
driver.Navigate().GoToUrl("验证码页面URL");
// 提取目标文字图片URL并下载
var targetImageElement = driver.FindElement(By.XPath("//*[@id='verify-bar-code']"));
var targetImgURL = targetImageElement.GetAttribute("src");
var targetImageContent = DownloadImage(targetImgURL);
File.WriteAllBytes("target_image.png", targetImageContent);
// 使用Tesseract进行OCR识别目标文字
var targetWords = RecognizeText("target_image.png");
Console.WriteLine("Target words: " + targetWords);
}
static byte[] DownloadImage(string url)
{
using (var client = new WebClient())
{
return client.DownloadData(url);
}
}
static string RecognizeText(string imagePath)
{
var ocr = new TesseractEngine(@"./tessdata", "chi_sim", EngineMode.Default);
using (var img = Pix.LoadFromFile(imagePath))
{
var result = ocr.Process(img);
return result.GetText();
}
}
}
二、背景图文字识别
同样地,先提取背景图片的URL并下载。
csharp
// 提取背景图片URL并下载
var backgroundImageElement = driver.FindElement(By.XPath("//*[@id='captcha-verify-image']"));
var backgroundImgURL = backgroundImageElement.GetAttribute("src");
var backgroundImageContent = DownloadImage(backgroundImgURL);
File.WriteAllBytes("background_image.png", backgroundImageContent);
获取图片后,使用Tesseract库识别背景图文字和位置。
csharp
static Dictionary<string, Rectangle> RecognizeTextWithPositions(string imagePath)
{
var ocr = new TesseractEngine(@"./tessdata", "chi_sim", EngineMode.Default);
using (var img = Pix.LoadFromFile(imagePath))
{
var result = ocr.Process(img);
var boxes = result.GetComponentImages(PageIteratorLevel.Word, true);
var wordsAndPositions = new Dictionary<string, Rectangle>();
foreach (var box in boxes)
{
var word = ocr.Process(box.Image).GetText();
wordsAndPositions[word.Trim()] = box.Box;
}
return wordsAndPositions;
}
}
var wordsAndPositions = RecognizeTextWithPositions("background_image.png");
三、计算点击坐标并点击
文字识别完成后,计算点击坐标并模拟点击。
csharp
// 计算文字点击坐标
var imgXY = new Dictionary<string, Point>();
foreach (var kvp in wordsAndPositions)
{
var word = kvp.Key;
var rect = kvp.Value;
imgXY[word] = new Point((rect.X + rect.Width / 2), (rect.Y + rect.Height / 2));
}
// 计算最终点击顺序与坐标
var result = new Dictionary<string, Point>();
foreach (var word in targetWords.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries))
{
if (imgXY.ContainsKey(word))
{
result[word] = imgXY[word];
}
}
// 模拟点击
var image1Location = backgroundImageElement.Location;
foreach (var kvp in result)
{
var xy = kvp.Value;
var x = xy.X * (340.0 / 552.0);
var y = xy.Y * (212.0 / 344.0);
var action = new OpenQA.Selenium.Interactions.Actions(driver);
action.MoveByOffset(image1Location.X + (int)x, image1Location.Y + (int)y).Click().Perform();
}
driver.Quit();