在当今信息爆炸的时代,快速查找和处理数据成为了一项重要的技能。无论是从Excel中查找特定内容,还是从图片中提取文字,这些功能都能极大地提高我们的工作效率。本文将带你从零开始,使用C#开发一款多功能查询工具,集成了关键字查找、Excel题目内容导入以及图片文字识别功能。通过这个项目,你将学习到C#编程的核心技术,并掌握如何将这些技术应用到实际场景中。
项目概述
我们的多功能查询工具将具备以下核心功能:
-
关键字查找:从Excel文件中快速查找包含指定关键字的题目内容。
-
Excel题目导入:支持导入Excel文件,并提取其中的题目内容。
-
图片文字识别(OCR):通过Tesseract OCR引擎,将图片中的文字提取并显示在文本框中。
通过这个项目,你将学习到以下技术要点:
-
C# WinForms 开发
-
Excel文件操作(使用Microsoft.Office.Interop.Excel)
-
图片文字识别(使用Tesseract OCR)
-
进度条与异步操作
-
事件驱动编程
技术细节与代码解析
1. 项目初始化与界面设计
首先,我们使用C# WinForms创建一个桌面应用程序。主界面包括以下控件:
-
TextBox:用于显示文件路径、搜索关键字和OCR结果。
-
Button:用于触发文件选择、搜索和OCR操作。
-
PictureBox:用于显示图片。
-
ProgressBar:用于显示搜索进度。
-
RadioButton:用于选择OCR语言(中文、英文等)。
public Form1()
{
InitializeComponent();
InitMessage();
ConfigSpeechRecognizer(); // 初始化语音识别器
this.WindowState = FormWindowState.Maximized;
MouseSearch(); // 初始化鼠标事件
}
2. Excel文件导入与关键字查找
用户可以通过按钮选择Excel文件,并在文本框中输入关键字进行查找。程序会遍历Excel文件中的每个单元格,查找包含关键字的题目内容,并将结果显示在文本框中。
private void btnImportExcel_Click(object sender, EventArgs e)
{
OpenFileDialog openFileDialog = new OpenFileDialog
{
Filter = "Excel Files|*.xls;*.xlsx",
Title = "选择Excel文件"
};
try
{
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
string filePath = openFileDialog.FileName;
textBox1.Text = filePath; // 显示文件路径
}
}
catch (Exception ex)
{
MessageBox.Show($"打开Excel文件时发生错误: {ex.Message}", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
在查找过程中,程序会使用进度条显示查找进度,并在找到匹配项后停止查找。
private void btnSearch_Click(object sender, EventArgs e)
{
string filePath = textBox1.Text;
excelApp = new EXCEL.Application();
excelWorkbook = excelApp.Workbooks.Open(filePath);
excelWorksheet = (EXCEL.Worksheet)excelWorkbook.Sheets[1]; // 假设只处理第一个工作表
excelRange = excelWorksheet.UsedRange; // 获取已使用的范围
string searchTerm = txtSearchTerm.Text.Trim();
if (string.IsNullOrEmpty(searchTerm))
{
MessageBox.Show("请输入搜索词!");
return;
}
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start(); // 开始计时
bool found = false;
int totalCells = excelRange.Rows.Count * excelRange.Columns.Count;
int processedCells = 0;
// 假设进度条的最大值为总单元格数
progressBar1.Maximum = totalCells;
progressBar1.Value = 0;
progressBar1.Visible = true; // 确保进度条是可见的
for (int col = 1; col <= excelRange.Columns.Count; col++)
{
for (int row = 1; row <= excelRange.Rows.Count; row++)
{
processedCells++;
progressBar1.Value = processedCells; // 更新进度条
if (excelRange.Cells[row, col] as EXCEL.Range != null && excelRange.Cells[row, col].Text.Contains(searchTerm))
{
// 获取标题行和数据行内容
List<string> rowData1 = GetRowData(1, 11); // 获取第1行的前11列
List<string> rowData = GetRowData(row, 11); // 获取当前行的前11列
// 格式化并显示在文本框中
string result = "";
for (int i = 0; i < rowData1.Count && i < rowData.Count; i++)
{
result += $"{rowData1[i]} : {rowData[i]}\r\n\r\n\r\n";
}
txtResults.Text = result;
found = true;
break; // 找到匹配项后停止查找
}
}
if (found) break; // 找到匹配项后停止查找
}
stopwatch.Stop(); // 停止计时
if (!found)
{
txtResults.Text = "未找到匹配项!";
}
// 显示查找时间
MessageBox.Show($"查找完成!共花费了 {stopwatch.ElapsedMilliseconds} 毫秒。");
// 清理Excel资源
excelWorkbook.Close(false);
excelApp.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(excelApp);
excelApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
// 隐藏进度条
progressBar1.Visible = false;
}
3. 图片文字识别(OCR)
用户可以通过按钮选择图片,并使用Tesseract OCR引擎将图片中的文字提取出来。程序支持多种语言(中文、英文等),用户可以通过RadioButton选择语言。
private void buttonSelectImage_Click(object sender, EventArgs e)
{
string boxFolderPath = textBoxFolderPath.Text.Trim();
if (string.IsNullOrEmpty(boxFolderPath))
{
MessageBox.Show("请优先选择语言库路径!");
return;
}
// 检查是否有 RadioButton 被选中
string language = null;
if (radioButtonChiSim.Checked)
{
language = "chi_sim"; // 中文
}
else if (radioButtonEng.Checked)
{
language = "eng"; // 英文
}
else if (radioButtonOsd.Checked)
{
language = "osd"; // 其他语言
}
if (language == null)
{
MessageBox.Show("请选择翻译语言!");
return;
}
OpenFileDialog openFileDialog = new OpenFileDialog
{
Filter = "Image Files|*.jpg;*.jpeg;*.png;*.bmp;*.gif",
Title = "Select an Image"
};
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
string imagePath = openFileDialog.FileName;
Bitmap bitmap = new Bitmap(imagePath);
pictureBoxImage.SizeMode = PictureBoxSizeMode.Zoom;
pictureBoxImage.Image = bitmap;
string tessDataPath = textBoxFolderPath.Text;
string ocrResult = PerformOCR(imagePath, tessDataPath, language);
textBoxOCRResult.Text = ocrResult; // 显示识别结果在TextBox中
}
}
OCR功能的核心代码如下:
private string PerformOCR(string imagePath, string tessDataPath, string language)
{
try
{
using (var ocrEngine = new TesseractEngine(tessDataPath, language, EngineMode.Default))
{
using (var img = Pix.LoadFromFile(imagePath))
{
using (var page = ocrEngine.Process(img))
{
return page.GetText(); // 返回识别结果
}
}
}
}
catch (Exception ex)
{
MessageBox.Show($"OCR失败: {ex.Message}");
return string.Empty;
}
}
4. 进度条与异步操作
在查找Excel文件时,程序会使用进度条显示查找进度。通过更新进度条的值,用户可以直观地看到查找的进度。
progressBar1.Maximum = totalCells;
progressBar1.Value = 0;
progressBar1.Visible = true; // 确保进度条是可见的
for (int col = 1; col <= excelRange.Columns.Count; col++)
{
for (int row = 1; row <= excelRange.Rows.Count; row++)
{
processedCells++;
progressBar1.Value = processedCells; // 更新进度条
}
}
项目总结
通过这个项目,我们实现了一个功能强大的查询工具,集成了关键字查找、Excel题目导入和图片文字识别功能。以下是本项目的亮点:
-
高效查找:通过遍历Excel文件,快速找到包含关键字的题目内容。
-
OCR支持:使用Tesseract OCR引擎,支持多种语言的文字识别。
-
用户友好:通过进度条和异步操作,提升用户体验。
如果你对C#开发感兴趣,或者想提升自己的数据处理能力,不妨动手实现这个项目!通过实践,你将掌握更多编程技巧,同时也能打造一款属于自己的实用工具。
下一步计划
-
支持更多文件格式:扩展支持PDF、Word等文件格式的查找功能。
-
优化OCR性能:通过多线程或GPU加速,提升OCR的识别速度。
-
增加语音输入:集成语音识别功能,支持通过语音输入关键字。
-
美化界面:优化界面设计,提升用户体验。
希望本文能激发你对编程的兴趣,期待你在评论区分享你的学习心得和项目成果!🚀
以下为程序源代码:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Office.Interop.Excel;
using System.IO;
using System.Runtime.InteropServices;
using EXCEL = Microsoft.Office.Interop.Excel;
using static System.Windows.Forms.VisualStyles.VisualStyleElement;
using static System.Windows.Forms.VisualStyles.VisualStyleElement.Status;
using System.Diagnostics;
using Microsoft.CognitiveServices.Speech;
using X = Microsoft.CognitiveServices.Speech;
using static System.Windows.Forms.VisualStyles.VisualStyleElement.ToolBar;
using TesseractOCR.Enums;
using TesseractOCR;
using Tesseract;
namespace ExSerchApp3
{
public partial class Form1 : Form
{
private EXCEL.Application excelApp;
private EXCEL.Workbook excelWorkbook;
private EXCEL.Worksheet excelWorksheet;
private EXCEL.Range excelRange;
private SpeechRecognizer recognizer;
private bool isListening = false;
private bool dragging = false;
// private Point dragStartPoint = new Point(0, 0);
private System.Drawing.Point dragStartPoint = new System.Drawing.Point(0, 0);
private System.Drawing.Point imageOffset = new System.Drawing.Point(0, 0);
public Form1()
{
InitializeComponent();
InitMessage();
// 初始化语音识别器
ConfigSpeechRecognizer();
this.WindowState = FormWindowState.Maximized;
MouseSearch();
}
private void InitMessage()
{
btnImportExcel.Text = "选择题目库";
btnSearch.Text = "查找结果";
btnClear.Text = "清空内容";
btnStopListen.Text = "停止监听";
buttonSelectFolder.Text = "选择语言库";
buttonSelectImage.Text = "加载图片";
V_label1.Text = "V1.0";
Tittle_label1.Text = "查询工具";
textBox1.ReadOnly = true;
txtSearchTerm.ReadOnly = false;
txtResults.ReadOnly = true;
textBoxOCRResult.ReadOnly = false;
textBoxFolderPath.ReadOnly = true;
}
private void MouseSearch()
{
this.Load += new EventHandler(Form1_Load);
this.pictureBoxImage.MouseDown += new MouseEventHandler(pictureBoxImage_MouseDown);
this.pictureBoxImage.MouseMove += new MouseEventHandler(pictureBoxImage_MouseMove);
this.pictureBoxImage.MouseUp += new MouseEventHandler(pictureBoxImage_MouseUp);
this.trackBar1.Scroll += new EventHandler(TrackBar1_Scroll);
}
private void pictureBoxImage_MouseDown(object sender, MouseEventArgs e)
{
if (e.Button == MouseButtons.Left)
{
dragging = true;
dragStartPoint = e.Location;
dragStartPoint.Offset(-imageOffset.X, -imageOffset.Y);
}
}
private void pictureBoxImage_MouseMove(object sender, MouseEventArgs e)
{
if (dragging)
{
imageOffset = new System.Drawing.Point(e.X + dragStartPoint.X - pictureBoxImage.ClientSize.Width / 2,
e.Y + dragStartPoint.Y - pictureBoxImage.ClientSize.Height / 2);
pictureBoxImage.Invalidate(); // 触发重绘
}
}
private void pictureBoxImage_MouseUp(object sender, MouseEventArgs e)
{
if (e.Button == MouseButtons.Left)
{
dragging = false;
}
}
private void TrackBar1_Scroll(object sender, EventArgs e)
{
AdjustPictureBoxSize();
}
private void AdjustPictureBoxSize()
{
if (pictureBoxImage.Image != null)
{
int newSize = trackBar1.Value;
int width = (int)(pictureBoxImage.Image.Width * newSize / 100.0);
int height = (int)(pictureBoxImage.Image.Height * newSize / 100.0);
pictureBoxImage.ClientSize = new Size(width, height);
// 重新计算并设置偏移量,以保持图片在中心
imageOffset = new System.Drawing.Point((pictureBoxImage.Width - pictureBoxImage.Image.Width) / 2,
(pictureBoxImage.Height - pictureBoxImage.Image.Height) / 2);
pictureBoxImage.PointToScreen(imageOffset);
pictureBoxImage.Invalidate(); // 触发重绘
}
}
private void ConfigSpeechRecognizer()
{
// 如果使用Azure认知服务,请取消注释以下代码并提供你的密钥和区域
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");
recognizer = new SpeechRecognizer(config);
// 如果使用默认设置(本地)
//var config = SpeechConfig.Default();
recognizer = new SpeechRecognizer(config);
recognizer.Recognized += (sender, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
// 更新文本框内容
txtSearchTerm.Text += e.Result.Text + Environment.NewLine;
}
};
}
private async void btnStopListen_Click(object sender, EventArgs e)
{
if (!isListening)
{
btnStopListen.Text = "停止监听";
isListening = true;
try
{
await recognizer.StartContinuousRecognitionAsync();
}
catch (Exception ex)
{
MessageBox.Show($"错误: {ex.Message}");
}
}
else
{
btnStopListen.Text = "开始监听";
isListening = false;
try
{
await recognizer.StopContinuousRecognitionAsync();
}
catch (Exception ex)
{
MessageBox.Show($"错误: {ex.Message}");
}
}
}
protected override void OnFormClosing(FormClosingEventArgs e)
{
base.OnFormClosing(e);
if (isListening)
{
recognizer.StopContinuousRecognitionAsync().Wait();
}
recognizer.Dispose();
}
private void Form1_Load(object sender, EventArgs e)
{
}
private void btnImportExcel_Click(object sender, EventArgs e)
{
OpenFileDialog openFileDialog = new OpenFileDialog
{
Filter = "Excel Files|*.xls;*.xlsx",
Title = "选择Excel文件"
};
try
{
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
string filePath = openFileDialog.FileName;
textBox1.Text = filePath;
}
}
catch (Exception ex)
{
MessageBox.Show($"打开Excel文件时发生错误: {ex.Message}", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
private void btnSearch_Click(object sender, EventArgs e)
{
string searchtextBox1 = textBox1.Text.Trim();
if (string.IsNullOrEmpty(searchtextBox1))
{
MessageBox.Show("请优化选择题库路径!");
return;
}
string filePath = textBox1.Text;
excelApp = new EXCEL.Application();
excelWorkbook = excelApp.Workbooks.Open(filePath);
excelWorksheet = (EXCEL.Worksheet)excelWorkbook.Sheets[1]; // 假设只处理第一个工作表
excelRange = excelWorksheet.UsedRange; // 获取已使用的范围
string searchTerm = txtSearchTerm.Text.Trim();
if (string.IsNullOrEmpty(searchTerm))
{
MessageBox.Show("请输入搜索词!");
return;
}
if (excelApp == null || excelWorkbook == null || excelWorksheet == null)
{
MessageBox.Show("请先导入Excel文件!");
return;
}
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start(); // 开始计时
bool found = false;
int totalCells = excelRange.Rows.Count * excelRange.Columns.Count;
int processedCells = 0;
// 假设进度条的最大值为总单元格数
progressBar1.Maximum = totalCells;
progressBar1.Value = 0;
progressBar1.Visible = true; // 确保进度条是可见的
for (int col = 1; col <= excelRange.Columns.Count; col++)
{
for (int row = 1; row <= excelRange.Rows.Count; row++)
{
processedCells++;
progressBar1.Value = processedCells; // 更新进度条
if (excelRange.Cells[row, col] as EXCEL.Range != null && excelRange.Cells[row, col].Text.Contains(searchTerm))
{
// 获取标题行和数据行内容
List<string> rowData1 = GetRowData(1, 11); // 获取第1行的前11列
List<string> rowData = GetRowData(row, 11); // 获取当前行的前11列(根据需要调整列数)
// 格式化并显示在文本框中
string result = "";
for (int i = 0; i < rowData1.Count && i < rowData.Count; i++)
{
result += $"{rowData1[i]} : {rowData[i]}\r\n\r\n\r\n";
}
txtResults.Text = result;
found = true;
// 可选:找到匹配项后停止查找
// if (found && !continueSearchingAfterMatch) // 假设您有一个布尔变量来控制是否继续搜索
if (found)
{
break; // 退出内层循环
}
}
// 如果在外层循环中找到了匹配项并且想要停止,则检查found变量
//if (found && !continueSearchingAfterMatch)
if (found)
{
break; // 退出外层循环
}
}
// 如果在内层循环的最后一次迭代中没有找到匹配项并且想要继续搜索,则重置found变量(如果需要)
// 这里我们不需要这样做,因为我们在内层循环中检查了found变量
}
stopwatch.Stop(); // 停止计时
if (!found)
{
txtResults.Text = "未找到匹配项!";
}
// 显示查找时间
MessageBox.Show($"查找完成!共花费了 {stopwatch.ElapsedMilliseconds} 毫秒。");
// 清理Excel资源
excelWorkbook.Close(false);
excelApp.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(excelApp);
excelApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
// 隐藏进度条(如果需要)
progressBar1.Visible = false;
}
private void progressBar1_Click(object sender, EventArgs e)
{
}
private List<string> GetRowData(int row, int numColumns)
{
List<string> rowData = new List<string>();
for (int i = 1; i <= numColumns; i++)
{
rowData.Add(excelRange.Cells[row, i].Text);
}
return rowData;
}
private void btnClear_Click(object sender, EventArgs e)
{
txtSearchTerm.Clear();
txtResults.Clear();
}
private void buttonSelectImage_Click(object sender, EventArgs e)
{
string boxFolderPath = textBoxFolderPath.Text.Trim();
if (string.IsNullOrEmpty(boxFolderPath))
{
MessageBox.Show("请优先选择语言库路径!");
return;
}
// 检查是否有 RadioButton 被选中
string language = null;
if (radioButtonChiSim.Checked)
{
language = "chi_sim";
}
else if (radioButtonEng.Checked)
{
language = "eng";
}
else if (radioButtonOsd.Checked)
{
language = "osd";
}
if (language == null)
{
MessageBox.Show("请选择翻译语言!");
return;
}
OpenFileDialog openFileDialog = new OpenFileDialog
{
Filter = "Image Files|*.jpg;*.jpeg;*.png;*.bmp;*.gif",
Title = "Select an Image"
};
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
string imagePath = openFileDialog.FileName;
Bitmap bitmap = new Bitmap(imagePath);
pictureBoxImage.SizeMode = PictureBoxSizeMode.Zoom;
pictureBoxImage.Image = bitmap;
string textBoxtessdataPath = textBoxFolderPath.Text;
string tessDataPath = textBoxtessdataPath;
string ocrResult = PerformOCR(imagePath, tessDataPath, language);
textBoxOCRResult.Text = ocrResult; // 显示识别结果在TextBox中
}
}
private string PerformOCR(string imagePath, string tessDataPath,string language)
{
//string tessdataPath = @"D:/Tesseract-OCR/tessdata";
//string language = "chi_sim";
string mode = "Tesseract.EngineMode.Default";
try
{
using (var ocrEngine = new TesseractEngine(tessDataPath, language, mode)) // "eng"表示英语,您可以根据需要更改语言
{
using (var img = Pix.LoadFromFile(imagePath))
{
using (var page = ocrEngine.Process(img))
{
return page.GetText();
}
}
}
}
catch (Exception ex)
{
// 这里可以记录日志或执行其他错误处理逻辑
MessageBox.Show($"OCR failed: {ex.Message}");
return string.Empty;
}
}
private void buttonSelectFolder_Click(object sender, EventArgs e)
{
using (FolderBrowserDialog folderBrowserDialog = new FolderBrowserDialog())
{
DialogResult result = folderBrowserDialog.ShowDialog();
if (result == DialogResult.OK)
{
// 获取用户选择的文件夹路径
string selectedFolderPath = folderBrowserDialog.SelectedPath;
// 在文本框中显示路径
textBoxFolderPath.Text = selectedFolderPath;
}
}
}
}
}