OCR识别过程中，调用文档方向检测函数

中安OCR人工智能

于 2024-10-11 14:13:11 发布

阅读量427

点赞数 13

文章标签： ocr 算法

本文链接：https://blog.csdn.net/weixin_72039842/article/details/142852504

版权

在OCR开发过程中，比较头疼的是，收到的识别文件中含各式各样的文本，倾斜模糊、中文英文、其中文字是倒的也是一种比较复杂的识别类型。

我们以中文识别为例，判断中文文档是否颠倒的算法使用了一个简单的重心检测，它依赖于计算图像的重心位置，并假设颠倒的文档其重心会发生显著变化。然而，中文字符的上下对称性可能会导致重心检测并不总是可靠。因此，我们结合撇捺方向性检测和上下结构检测，以提高对文档是否颠倒的识别精度。

算法优化步骤

重心检测：继续使用重心检测来判断整体字符的重心分布，尤其是当重心明显靠下时，可能表示文档颠倒。
撇捺方向检测：中文的撇捺方向具有显著的上下差异。我们通过统计撇（从左上到右下的斜线）和捺（从右上到左下的斜线）的数量差异，来帮助判断是否颠倒。
上下结构检测：中文字符中的上下结构（例如“家”、“票”等）在颠倒后会产生明显变化，通过分析字符中部的笔画密度来判断是否倒置。

具体代码

#include <windows.h>
#include <gdiplus.h>
#include <commdlg.h> // 用于打开文件对话框
#include <iostream>
#include <vector>
#include <cmath>

#pragma comment (lib,"Gdiplus.lib")

using namespace Gdiplus;
using namespace std;

// 全局变量
HINSTANCE hInst;
LPCWSTR szTitle = L"中文文档方向检测";
LPCWSTR szWindowClass = L"DocumentDirectionApp";
Gdiplus::Bitmap* loadedImage = nullptr;
bool isInverted = false;

// 定义退出菜单ID
#define IDM_EXIT 1000

// 初始化GDI+
void InitGDIPlus(ULONG_PTR &gdiplusToken) {
GdiplusStartupInput gdiplusStartupInput;
GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL);
}

// 关闭GDI+
void ShutdownGDIPlus(ULONG_PTR &gdiplusToken) {
GdiplusShutdown(gdiplusToken);
}

// 打开文件对话框，选择图像文件
std::wstring OpenFileDialog(HWND hwnd) {
wchar_t szFile[260] = { 0 };
OPENFILENAME ofn = { 0 };
ofn.lStructSize = sizeof(ofn);
ofn.hwndOwner = hwnd;
ofn.lpstrFile = szFile;
ofn.nMaxFile = sizeof(szFile) / sizeof(*szFile);
ofn.lpstrFilter = L"Image Files\0*.bmp;*.jpg;*.jpeg;*.png\0";
ofn.nFilterIndex = 1;
ofn.lpstrFileTitle = NULL;
ofn.nMaxFileTitle = 0;
ofn.lpstrInitialDir = NULL;
ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST;

if (GetOpenFileName(&ofn)) {
return szFile;
}

return L"";
}

// 加载图像
Gdiplus::Bitmap* LoadImageFromFile(const std::wstring& filePath) {
return new Gdiplus::Bitmap(filePath.c_str());
}

// 计算图像的重心位置
double CalculateCenterOfMass(Gdiplus::Bitmap* bmp) {
int width = bmp->GetWidth();
int height = bmp->GetHeight();
int sumY = 0;
int count = 0;

for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
Gdiplus::Color color;
bmp->GetPixel(x, y, &color);
int intensity = (color.GetR() + color.GetG() + color.GetB()) / 3; // 转为灰度
if (intensity < 128) { // 假设灰度低于128为文字
sumY += y;
count++;
}
}
}

return (count > 0) ? (double)sumY / count : height / 2.0;
}

// 检测撇和捺的方向性
double CalculateStrokeOrientation(Gdiplus::Bitmap* bmp) {
int width = bmp->GetWidth();
int height = bmp->GetHeight();
int leftTopToRightBottom = 0; // 撇方向（左上到右下）
int rightTopToLeftBottom = 0; // 捺方向（右上到左下）

for (int y = 1; y < height - 1; ++y) {
for (int x = 1; x < width - 1; ++x) {
Gdiplus::Color color;
bmp->GetPixel(x, y, &color);
int intensity = (color.GetR() + color.GetG() + color.GetB()) / 3;
if (intensity < 128) { // 假设灰度低于128为文字
Gdiplus::Color colorLeftTop, colorRightBottom, colorRightTop, colorLeftBottom;
bmp->GetPixel(x - 1, y - 1, &colorLeftTop);
bmp->GetPixel(x + 1, y + 1, &colorRightBottom);
bmp->GetPixel(x + 1, y - 1, &colorRightTop);
bmp->GetPixel(x - 1, y + 1, &colorLeftBottom);

int intensityLeftTop = (colorLeftTop.GetR() + colorLeftTop.GetG() + colorLeftTop.GetB()) / 3;
int intensityRightBottom = (colorRightBottom.GetR() + colorRightBottom.GetG() + colorRightBottom.GetB()) / 3;
int intensityRightTop = (colorRightTop.GetR() + colorRightTop.GetG() + colorRightTop.GetB()) / 3;
int intensityLeftBottom = (colorLeftBottom.GetR() + colorLeftBottom.GetG() + colorLeftBottom.GetB()) / 3;

if (intensityLeftTop < 128 && intensityRightBottom < 128) {
leftTopToRightBottom++; // 统计撇方向
}
if (intensityRightTop < 128 && intensityLeftBottom < 128) {
rightTopToLeftBottom++; // 统计捺方向
}
}
}
}

return abs(leftTopToRightBottom - rightTopToLeftBottom); // 返回撇和捺数量的差异
}

// 旋转图像180度
Gdiplus::Bitmap* RotateImage180(Gdiplus::Bitmap* bmp) {
int width = bmp->GetWidth();
int height = bmp->GetHeight();
Gdiplus::Bitmap* rotatedBmp = new Gdiplus::Bitmap(width, height);

for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
Gdiplus::Color color;
bmp->GetPixel(x, y, &color);
rotatedBmp->SetPixel(width - 1 - x, height - 1 - y, color);
}
}
return rotatedBmp;
}

// 判断图像是否颠倒
bool IsImageInverted(Gdiplus::Bitmap* bmp) {
// 计算正向图像的重心和撇捺方向
double centerOfMassNormal = CalculateCenterOfMass(bmp);
double strokeOrientationNormal = CalculateStrokeOrientation(bmp);

// 将图像旋转180度
Gdiplus::Bitmap* rotatedBmp = RotateImage180(bmp);

// 计算旋转后的重心和撇捺方向
double centerOfMassRotated = CalculateCenterOfMass(rotatedBmp);
double strokeOrientationRotated = CalculateStrokeOrientation(rotatedBmp);

// 释放旋转图像
delete rotatedBmp;

// 比较正向和旋转后图像的重心和撇捺差异
double massDiff = abs(centerOfMassNormal - centerOfMassRotated);
double strokeDiff = abs(strokeOrientationNormal - strokeOrientationRotated);

// 如果重心或撇捺方向的差异大于阈值，认为图像颠倒
return (massDiff > 50.0 || strokeDiff > 50.0); // 阈值可根据需要调整
}

// 显示图像
void DisplayImage(HDC hdc, Gdiplus::Bitmap* bmp) {
if (bmp) {
Gdiplus::Graphics graphics(hdc);
graphics.DrawImage(bmp, 10, 10, bmp->GetWidth(), bmp->GetHeight());
}
}

// 窗口过程函数
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) {
PAINTSTRUCT ps;
抛转引玉，以示启发，需要哥们支持的，联系support@sinosecu.com.cn