【图片识别工具】如何一次性识别多张图片区域文字改名,或者将多个区域内容组合导出表格,基于WPF和京东OCR的实现方案

应用场景

该应用适用于需要批量处理图片文字识别的场景,典型场景包括:

  • 电商商品图片批量信息提取(如价格、型号、规格)
  • 文档档案管理系统中的文字内容提取
  • 批量表单数据录入(如订单、发票信息提取)
  • 图片素材批量重命名(基于图片内文字内容)

界面设计

建议设计以下功能区域:

  1. 顶部菜单栏:包含文件操作(打开、保存)、设置(Jd_OCR 配置)等
  2. 左侧文件管理区:展示已加载的图片文件列表
  3. 中间预览区:显示当前选中图片及识别区域标记
  4. 右侧配置区:
    • 京东Jd_OCR 服务配置(API Key、Secret 等)
    • 识别区域设置(可添加 / 删除 / 调整识别区域)
    • 命名规则设置(基于识别内容的文件名模板)
  5. 底部状态栏:显示处理进度、结果统计和操作按钮

详细代码步骤

下面是基于 WPF 和京东 OCR 的实现方案:

// 1. 创建WPF应用项目并添加必要引用
// 引用:Newtonsoft.Json, System.Drawing.Common, System.Net.Http

// 2. 创建数据模型
public class OcrConfig
{
    public string AppKey { get; set; }
    public string AppSecret { get; set; }
    public string ApiUrl { get; set; } = "https://api.jdcloud.com/v1/regions/cn-north-1/ocr/general";
}

public class ImageFile
{
    public string FilePath { get; set; }
    public BitmapSource ImageSource { get; set; }
    public List<OcrRegion> Regions { get; set; } = new List<OcrRegion>();
    public List<OcrResult> Results { get; set; } = new List<OcrResult>();
    public string NewFileName { get; set; }
}

public class OcrRegion
{
    public int X { get; set; }
    public int Y { get; set; }
    public int Width { get; set; }
    public int Height { get; set; }
    public string RegionName { get; set; }
}

public class OcrResult
{
    public OcrRegion Region { get; set; }
    public string Text { get; set; }
}

// 3. 创建OCR服务类
public class JdOcrService
{
    private readonly OcrConfig _config;
    private readonly HttpClient _httpClient;

    public JdOcrService(OcrConfig config)
    {
        _config = config;
        _httpClient = new HttpClient();
        _httpClient.DefaultRequestHeaders.Add("Authorization", GetAuthorizationHeader());
    }

    private string GetAuthorizationHeader()
    {
        // 实现京东云API签名认证
        // 参考:https://docs.jdcloud.com/cn/api-gateway/signature
        return $"jdcloud2 request-id={Guid.NewGuid()},algorithm=HMAC-SHA256,credential={_config.AppKey}/20250519/cn-north-1/ocr/request,signed-headers=content-type;host,signature=签名值";
    }

    public async Task<List<OcrResult>> RecognizeImageRegionsAsync(ImageFile imageFile)
    {
        var results = new List<OcrResult>();
        
        foreach (var region in imageFile.Regions)
        {
            var croppedImage = CropImage(imageFile.ImageSource, region);
            var imageBytes = ConvertImageToBytes(croppedImage);
            var text = await SendImageToOcrApiAsync(imageBytes);
            
            results.Add(new OcrResult
            {
                Region = region,
                Text = text
            });
        }
        
        return results;
    }

    private BitmapSource CropImage(BitmapSource source, OcrRegion region)
    {
        return new CroppedBitmap(source, new Int32Rect(region.X, region.Y, region.Width, region.Height));
    }

    private byte[] ConvertImageToBytes(BitmapSource image)
    {
        using (var ms = new MemoryStream())
        {
            var encoder = new PngBitmapEncoder();
            encoder.Frames.Add(BitmapFrame.Create(image));
            encoder.Save(ms);
            return ms.ToArray();
        }
    }

    private async Task<string> SendImageToOcrApiAsync(byte[] imageBytes)
    {
        var content = new MultipartFormDataContent
        {
            { new ByteArrayContent(imageBytes), "image", "image.png" }
        };

        var response = await _httpClient.PostAsync(_config.ApiUrl, content);
        response.EnsureSuccessStatusCode();
        
        var jsonResult = await response.Content.ReadAsStringAsync();
        dynamic result = JsonConvert.DeserializeObject(jsonResult);
        
        return result.data.words_result?.ToString() ?? string.Empty;
    }
}

// 4. 创建ViewModel(使用MVVM模式)
public class MainViewModel : INotifyPropertyChanged
{
    public event PropertyChangedEventHandler PropertyChanged;
    
    private OcrConfig _ocrConfig;
    private ObservableCollection<ImageFile> _imageFiles = new ObservableCollection<ImageFile>();
    private ImageFile _selectedImageFile;
    private string _fileNameTemplate = "{Region1}_{Region2}";
    
    public OcrConfig OcrConfig
    {
        get => jd_ocrConfig;
        set { jd_ocrConfig = value; OnPropertyChanged(); }
    }
    
    public ObservableCollection<ImageFile> ImageFiles
    {
        get => _imageFiles;
        set { _imageFiles = value; OnPropertyChanged(); }
    }
    
    public ImageFile SelectedImageFile
    {
        get => _selectedImageFile;
        set { _selectedImageFile = value; OnPropertyChanged(); }
    }
    
    public string FileNameTemplate
    {
        get => _fileNameTemplate;
        set { _fileNameTemplate = value; OnPropertyChanged(); }
    }
    
    // 命令
    public RelayCommand AddImageCommand { get; }
    public RelayCommand RemoveImageCommand { get; }
    public RelayCommand AddRegionCommand { get; }
    public RelayCommand RemoveRegionCommand { get; }
    public RelayCommand RecognizeAllCommand { get; }
    public RelayCommand RenameFilesCommand { get; }
    public RelayCommand ExportToTableCommand { get; }
    
    public MainViewModel()
    {
        // 初始化配置
        OcrConfig = new OcrConfig
        {
            AppKey = "your_app_key",
            AppSecret = "your_app_secret"
        };
        
        // 初始化命令
        AddImageCommand = new RelayCommand(AddImage);
        RemoveImageCommand = new RelayCommand(RemoveImage, () => SelectedImageFile != null);
        AddRegionCommand = new RelayCommand(AddRegion, () => SelectedImageFile != null);
        RemoveRegionCommand = new RelayCommand(RemoveRegion, () => SelectedImageFile?.Regions?.Any() == true);
        RecognizeAllCommand = new RelayCommand(RecognizeAll);
        RenameFilesCommand = new RelayCommand(RenameFiles);
        ExportToTableCommand = new RelayCommand(ExportToTable);
    }
    
    private void AddImage()
    {
        var openFileDialog = new OpenFileDialog
        {
            Filter = "Image files (*.png;*.jpg;*.jpeg)|*.png;*.jpg;*.jpeg|All files (*.*)|*.*",
            Multiselect = true
        };
        
        if (openFileDialog.ShowDialog() == true)
        {
            foreach (var filePath in openFileDialog.FileNames)
            {
                var imageFile = new ImageFile
                {
                    FilePath = filePath,
                    ImageSource = LoadImage(filePath)
                };
                
                ImageFiles.Add(imageFile);
            }
        }
    }
    
    private BitmapSource LoadImage(string filePath)
    {
        using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
        {
            var decoder = BitmapDecoder.Create(stream, BitmapCreateOptions.None, BitmapCacheOption.OnLoad);
            return decoder.Frames[0];
        }
    }
    
    private void RemoveImage()
    {
        if (SelectedImageFile != null)
        {
            ImageFiles.Remove(SelectedImageFile);
        }
    }
    
    private void AddRegion()
    {
        if (SelectedImageFile != null)
        {
            SelectedImageFile.Regions.Add(new OcrRegion
            {
                X = 0,
                Y = 0,
                Width = 100,
                Height = 50,
                RegionName = $"Region{SelectedImageFile.Regions.Count + 1}"
            });
        }
    }
    
    private void RemoveRegion()
    {
        if (SelectedImageFile?.Regions?.LastOrDefault() != null)
        {
            SelectedImageFile.Regions.Remove(SelectedImageFile.Regions.Last());
        }
    }
    
    private async void RecognizeAll()
    {
        var ocrService = new JdOcrService(OcrConfig);
        
        foreach (var imageFile in ImageFiles)
        {
            imageFile.Results = await ocrService.RecognizeImageRegionsAsync(imageFile);
            GenerateNewFileName(imageFile);
        }
    }
    
    private void GenerateNewFileName(ImageFile imageFile)
    {
        if (imageFile.Results == null || !imageFile.Results.Any())
            return;
            
        var template = FileNameTemplate;
        
        foreach (var result in imageFile.Results)
        {
            template = template.Replace($"{{{result.Region.RegionName}}}", result.Text);
        }
        
        // 清理文件名中的非法字符
        var invalidChars = Path.GetInvalidFileNameChars();
        imageFile.NewFileName = new string(template.Where(c => !invalidChars.Contains(c)).ToArray());
    }
    
    private void RenameFiles()
    {
        var folderPath = Path.GetDirectoryName(ImageFiles.FirstOrDefault()?.FilePath);
        
        if (string.IsNullOrEmpty(folderPath))
            return;
            
        foreach (var imageFile in ImageFiles)
        {
            if (!string.IsNullOrEmpty(imageFile.NewFileName))
            {
                var extension = Path.GetExtension(imageFile.FilePath);
                var newPath = Path.Combine(folderPath, $"{imageFile.NewFileName}{extension}");
                
                try
                {
                    File.Move(imageFile.FilePath, newPath);
                    imageFile.FilePath = newPath;
                }
                catch (Exception ex)
                {
                    MessageBox.Show($"Error renaming file: {ex.Message}");
                }
            }
        }
    }
    
    private void ExportToTable()
    {
        var dataTable = new DataTable("OCR Results");
        
        // 添加列
        dataTable.Columns.Add("File Name", typeof(string));
        
        if (ImageFiles.Any() && ImageFiles[0].Regions.Any())
        {
            foreach (var region in ImageFiles[0].Regions)
            {
                dataTable.Columns.Add(region.RegionName, typeof(string));
            }
        }
        
        // 添加行
        foreach (var imageFile in ImageFiles)
        {
            var row = dataTable.NewRow();
            row["File Name"] = Path.GetFileName(imageFile.FilePath);
            
            foreach (var result in imageFile.Results)
            {
                row[result.Region.RegionName] = result.Text;
            }
            
            dataTable.Rows.Add(row);
        }
        
        // 导出到CSV
        var csvPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "OCR_Results.csv");
        using (var writer = new StreamWriter(csvPath))
        {
            // 写入标题行
            writer.WriteLine(string.Join(",", dataTable.Columns.Cast<DataColumn>().Select(c => $"\"{c.ColumnName}\"")));
            
            // 写入数据行
            foreach (DataRow row in dataTable.Rows)
            {
                writer.WriteLine(string.Join(",", row.ItemArray.Select(v => $"\"{v}\"")));
            }
        }
        
        MessageBox.Show($"Export completed. File saved to: {csvPath}");
    }
    
    protected virtual void OnPropertyChanged([CallerMemberName] string propertyName = null)
    {
        PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
    }
}

// 5. 简单的RelayCommand实现
public class RelayCommand : ICommand
{
    private readonly Action _execute;
    private readonly Func<bool> _canExecute;

    public event EventHandler CanExecuteChanged
    {
        add { CommandManager.RequerySuggested += value; }
        remove { CommandManager.RequerySuggested -= value; }
    }

    public RelayCommand(Action execute, Func<bool> canExecute = null)
    {
        _execute = execute ?? throw new ArgumentNullException(nameof(execute));
        _canExecute = canExecute;
    }

    public bool CanExecute(object parameter)
    {
        return _canExecute == null || _canExecute();
    }

    public void Execute(object parameter)
    {
        _execute();
    }
}

界面 XAML 设计示例

下面是主窗口的 XAML 设计示例:

<Window x:Class="OcrBatchProcessor.MainWindow"
        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
        xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
        xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
        xmlns:local="clr-namespace:OcrBatchProcessor"
        mc:Ignorable="d"
        Title="批量图片京东OCR处理工具" Height="700" Width="1000">
    <Window.DataContext>
        <local:MainViewModel/>
    </Window.DataContext>
    <Grid>
        <Grid.RowDefinitions>
            <RowDefinition Height="Auto"/>
            <RowDefinition Height="*"/>
            <RowDefinition Height="Auto"/>
        </Grid.RowDefinitions>
        
        <!-- 菜单栏 -->
        <Menu Grid.Row="0">
            <MenuItem Header="文件">
                <MenuItem Header="添加图片" Command="{Binding AddImageCommand}"/>
                <MenuItem Header="移除选中图片" Command="{Binding RemoveImageCommand}"/>
                <MenuItem Header="退出"/>
            </MenuItem>
            <MenuItem Header="设置">
                <MenuItem Header="OCR配置"/>
                <MenuItem Header="命名规则"/>
            </MenuItem>
            <MenuItem Header="帮助">
                <MenuItem Header="关于"/>
            </MenuItem>
        </Menu>
        
        <!-- 主内容区 -->
        <Grid Grid.Row="1" Margin="5">
            <Grid.ColumnDefinitions>
                <ColumnDefinition Width="250"/>
                <ColumnDefinition Width="*"/>
                <ColumnDefinition Width="300"/>
            </Grid.ColumnDefinitions>
            
            <!-- 左侧文件列表 -->
            <ListBox Grid.Column="0" ItemsSource="{Binding ImageFiles}" 
                     SelectedItem="{Binding SelectedImageFile}" 
                     DisplayMemberPath="FilePath" Margin="0,5,5,0"/>
            
            <!-- 中间预览区 -->
            <Grid Grid.Column="1" Margin="5">
                <ScrollViewer>
                    <Canvas x:Name="ImageCanvas" Background="LightGray" 
                            Width="{Binding SelectedImageFile.ImageSource.PixelWidth}"
                            Height="{Binding SelectedImageFile.ImageSource.PixelHeight}">
                        <Image Source="{Binding SelectedImageFile.ImageSource}" Stretch="None"/>
                        
                        <!-- 识别区域可视化 -->
                        <ItemsControl ItemsSource="{Binding SelectedImageFile.Regions}">
                            <ItemsControl.ItemTemplate>
                                <DataTemplate>
                                    <Rectangle Stroke="Red" StrokeThickness="2"
                                               Width="{Binding Width}" Height="{Binding Height}"
                                               Canvas.Left="{Binding X}" Canvas.Top="{Binding Y}">
                                        <Rectangle.Fill>
                                            <SolidColorBrush Color="Red" Opacity="0.2"/>
                                        </Rectangle.Fill>
                                    </Rectangle>
                                </DataTemplate>
                            </ItemsControl.ItemTemplate>
                        </ItemsControl>
                    </Canvas>
                </ScrollViewer>
                
                <!-- 工具栏 -->
                <StackPanel Orientation="Horizontal" HorizontalAlignment="Left" VerticalAlignment="Bottom">
                    <Button Content="添加图片识别区域" Command="{Binding AddRegionCommand}" Margin="5"/>
                    <Button Content="删除图片识别区域" Command="{Binding RemoveRegionCommand}" Margin="5"/>
                </StackPanel>
            </Grid>
            
            <!-- 右侧配置区 -->
            <TabControl Grid.Column="2" Margin="5,0,0,0">
                <TabItem Header="OCR配置">
                    <StackPanel Margin="5">
                        <Label Content="App Key:"/>
                        <TextBox Text="{Binding OcrConfig.AppKey}" Margin="0,0,0,10"/>
                        
                        <Label Content="App Secret:"/>
                        <TextBox Text="{Binding OcrConfig.AppSecret}" Margin="0,0,0,10"/>
                        
                        <Label Content="API URL:"/>
                        <TextBox Text="{Binding OcrConfig.ApiUrl}" Margin="0,0,0,10"/>
                        
                        <Button Content="测试连接" HorizontalAlignment="Left" Margin="0,10,0,0"/>
                    </StackPanel>
                </TabItem>
                
                <TabItem Header="jdocr识别结果">
                    <ListView ItemsSource="{Binding SelectedImageFile.Results}">
                        <ListView.ItemTemplate>
                            <DataTemplate>
                                <StackPanel Orientation="Vertical" Margin="5">
                                    <TextBlock Text="{Binding Region.RegionName}" FontWeight="Bold"/>
                                    <TextBlock Text="{Binding Text}" TextWrapping="Wrap"/>
                                </StackPanel>
                            </DataTemplate>
                        </ListView.ItemTemplate>
                    </ListView>
                </TabItem>
                
                <TabItem Header="命名规则">
                    <StackPanel Margin="5">
                        <Label Content="文件名模板:"/>
                        <TextBox Text="{Binding FileNameTemplate}" Margin="0,0,0,10"/>
                        <TextBlock Text="示例: {Region1}_{Region2}.jpg" Foreground="Gray"/>
                    </StackPanel>
                </TabItem>
            </TabControl>
        </Grid>
        
        <!-- 状态栏 -->
        <StackPanel Grid.Row="2" Orientation="Horizontal" Margin="5" HorizontalAlignment="Center">
            <Button Content="开始识别" Command="{Binding RecognizeAllCommand}" Margin="5"/>
            <Button Content="批量重命名" Command="{Binding RenameFilesCommand}" Margin="5"/>
            <Button Content="导出表格" Command="{Binding ExportToTableCommand}" Margin="5"/>
        </StackPanel>
    </Grid>
</Window>

总结与优化

  1. 性能优化

    • 实现多线程处理,提高批量识别速度
    • 添加图像预处理功能(灰度化、降噪等)提高Jd_OCR 准确率
    • 实现结果缓存机制,避免重复识别相同区域
  2. 用户体验优化

    • 添加拖拽上传功能
    • 实现识别区域的可视化调整(支持鼠标拖动、缩放)
    • 添加进度条显示处理进度
    • 实现撤销 / 重做功能
  3. 功能扩展

    • 支持多种 OCR 引擎切换
    • 添加识别结果编辑功能
    • 支持更复杂的命名规则和表格导出格式
    • 实现模板保存 / 加载功能,方便处理同类图片
  4. 稳定性优化

    • 添加更完善的错误处理和日志记录
    • 实现自动保存功能,防止数据丢失
    • 添加配置文件管理,保存用户设置

通过以上实现方案,你可以开发一个功能完整的图片批量 OCR 处理工具,满足从图片区域文字识别到文件重命名或表格导出的需求。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值