安卓OCR使用(Google ML Kit)

OCR是一个很常用的功能,Google ML Kit提供了OCR能力,用起来也很简单,本文介绍一下使用方法。

1. 相关概念

名词概念解释
TextBlock一个段落
Line一行文本
Element元素单词;对汉字来说,类似"开头 (分隔符)中间(分隔符) 结尾"这样含有明显分隔符的才会有多个字在一个Element中,否则就是单个字
Symbol字符字母;对汉字来说就是单个字

2. 代码实现

在build.gradle中添加相关依赖:

// To recognize Chinese script
implementation 'com.google.mlkit:text-recognition-chinese:16.0.1'

添加布局文件activity_ocr.xml:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical">

    <FrameLayout
        android:layout_width="wrap_content"
        android:layout_height="wrap_content">

        <SurfaceView
            android:id="@+id/camera_preview"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content" />

        <com.example.study.views.OCRDrawView
            android:id="@+id/ocr_area"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content" />
    </FrameLayout>

    <Button
        android:id="@+id/ocr_switch"
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:layout_gravity="center_horizontal|bottom"
        android:background="#617172"
        android:text="stop" />
</LinearLayout>

绘制文字的OCRDrawView.java:

package com.example.study.views;

import android.content.Context;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.Path;
import android.graphics.Point;
import android.util.AttributeSet;
import android.view.View;

import androidx.annotation.Nullable;

import java.util.ArrayList;
import java.util.List;

public class OCRDrawView extends View {

    private final Object lock = new Object();

    protected Paint paint = new Paint();

    protected Path path = new Path();

    private final List<ShapeInfo> cornerPointsList = new ArrayList<>();

    public OCRDrawView(Context context) {
        super(context);
    }

    public OCRDrawView(Context context, @Nullable AttributeSet attrs) {
        super(context, attrs);
    }

    public void clear() {
        synchronized (lock) {
            cornerPointsList.clear();
        }
        postInvalidate();
    }

    public void add(Point[] cornerPoints, String text) {
        synchronized (lock) {
            cornerPointsList.add(new ShapeInfo(cornerPoints, text));
        }
    }

    @Override
    protected void onDraw(Canvas canvas) {
        super.onDraw(canvas);

        synchronized (lock) {
            for (ShapeInfo shapeInfo : cornerPointsList) {
                drawBackground(shapeInfo, canvas);
                drawText(shapeInfo, canvas);
            }
        }
    }

    private void drawText(ShapeInfo shapeInfo, Canvas canvas) {
        Point[] points = shapeInfo.points;
        // 根据矩形区域的高度设置文字大小
        double height = calDistance(points[0], points[3]);
        double width = calDistance(points[2], points[3]);
        float textSize = (float) Math.min(height, width / shapeInfo.text.length());
        paint.setColor(Color.BLUE);
        paint.setTextSize(textSize);

        path.reset();
        path.moveTo(points[3].x, points[3].y);
        path.lineTo(points[2].x, points[2].y);

        canvas.drawTextOnPath(shapeInfo.text, path, 0, 0, paint);
    }

    private double calDistance(Point start, Point end) {
        return Math.sqrt(Math.pow(start.x - end.x, 2) + Math.pow(start.y - end.y, 2));
    }

    private void drawBackground(ShapeInfo shapeInfo, Canvas canvas) {
        Point[] shape = shapeInfo.points;
        path.reset();
        path.moveTo(shape[3].x, shape[3].y);
        for (int i = 0; i < shape.length; i++) {
            path.lineTo(shape[i].x, shape[i].y);
        }
        path.close();

        paint.setColor(Color.WHITE);
        canvas.drawPath(path, paint);
    }

    static class ShapeInfo {
        Point[] points;
        String text;

        public ShapeInfo(Point[] shape, String text) {
            this.points = shape;
            this.text = text;
        }
    }
}

activity类:

package com.example.study.activities;

import android.Manifest;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.ImageFormat;
import android.graphics.Matrix;
import android.graphics.Point;
import android.graphics.Rect;
import android.graphics.YuvImage;
import android.hardware.Camera;
import android.os.Bundle;
import android.util.Log;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.ViewGroup;
import android.widget.Button;
import android.widget.FrameLayout;
import android.widget.Toast;

import androidx.activity.ComponentActivity;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;

import com.example.study.R;
import com.example.study.views.OCRDrawView;
import com.google.mlkit.vision.text.Text;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions;

import java.io.ByteArrayOutputStream;

public class OCRActivity extends ComponentActivity implements Camera.PreviewCallback, SurfaceHolder.Callback {
    private static final String TAG = "CameraDemoActivity";
    private static final int REQUEST_CAMERA = 1000;
    private static final int HEIGHT = 1920;
    private static final int WIDTH = 1080;
    private static final int ORIENTATION = 90;
    private SurfaceView preview;
    private OCRDrawView ocrArea;
    private Button ocrSwitch;
    private Camera camera;
    private Camera.Parameters parameters;

    private TextRecognizer recognizer;
    private Matrix matrix;

    private boolean isRecognizering = false;
    private boolean stopRecognizer = false;

    @Override
    protected void onCreate(@Nullable Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        this.setContentView(R.layout.activity_ocr);
        initView();
        initVar();

        // 检查权限
        if (checkSelfPermission(Manifest.permission.CAMERA) != PackageManager.PERMISSION_GRANTED) {
            requestPermissions(new String[]{Manifest.permission.CAMERA}, REQUEST_CAMERA);
        } else {
            preview.getHolder().addCallback(this);
        }
    }

    private void initVar() {
        recognizer = TextRecognition.getClient(new ChineseTextRecognizerOptions.Builder().build());
        matrix = new Matrix();
        matrix.setRotate(ORIENTATION);
        // 4个角的坐标是没有旋转过的,所以HEIGHT、WIDTH是反的
        matrix.preTranslate(-HEIGHT >> 1, -WIDTH >> 1);
        matrix.postTranslate(WIDTH >> 1, HEIGHT >> 1);
    }

    private void initView() {
        preview = findViewById(R.id.camera_preview);
        ocrArea = findViewById(R.id.ocr_area);
        ocrSwitch = findViewById(R.id.ocr_switch);
        ocrSwitch.setOnClickListener(view -> {
            stopRecognizer = !stopRecognizer;
            ocrSwitch.setText(stopRecognizer ? "start" : "stop");
            if (camera == null) {
                return;
            }
            if (stopRecognizer) {
                camera.stopPreview();
            } else {
                ocrArea.clear();
                camera.startPreview();
            }
        });
        adjustSurface(preview, ocrArea);
    }

    private void adjustSurface(SurfaceView cameraPreview, OCRDrawView ocrArea) {
        FrameLayout.LayoutParams cameraPreviewParams = (FrameLayout.LayoutParams) cameraPreview.getLayoutParams();
        cameraPreviewParams.width = WIDTH;
        cameraPreviewParams.height = HEIGHT;

        ViewGroup.LayoutParams ocrAreaParams = ocrArea.getLayoutParams();
        ocrAreaParams.width = WIDTH;
        ocrAreaParams.height = HEIGHT;
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults);
        if (requestCode == REQUEST_CAMERA && grantResults.length > 0) {
            if (grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                preview.getHolder().addCallback(this);
                surfaceCreated(preview.getHolder());
                camera.setPreviewCallback(this);
                camera.startPreview();
            } else {
                finish();
            }
        }
    }

    @Override
    public void onPreviewFrame(byte[] data, Camera camera) {
        if (isRecognizering || stopRecognizer) {
            return;
        }
        Bitmap bitmap = convertToBitmap(camera, data);
        isRecognizering = true;
        recognizer.process(bitmap, ORIENTATION).addOnSuccessListener(text -> {
            parseOCRResult(text);
        }).addOnFailureListener(exception -> {
            Toast.makeText(this, "Failure", Toast.LENGTH_SHORT).show();
            isRecognizering = false;
        }).addOnCompleteListener(task -> {
            isRecognizering = false;
        }).addOnCanceledListener(() -> {
            Toast.makeText(this, "Canceled", Toast.LENGTH_SHORT).show();
            isRecognizering = false;
        });
    }

    private void parseOCRResult(Text text) {
        // 所有识别到的内容,下同
        String textContent = text.getText();
        if (textContent == null || textContent.trim().length() == 0) {
            return;
        }
        ocrArea.clear();
        // 块,段落
        for (Text.TextBlock textBlock : text.getTextBlocks()) {
            // 一行文本
            for (Text.Line line : textBlock.getLines()) {
                drawResult(line);
                // 元素:单词,对汉字来说,需要"开头 (分隔符)中间(分隔符) 结尾"之类比较强烈的分隔符去分隔
                for (Text.Element element : line.getElements()) {
                    // symbol:字符,字母,字
                    for (Text.Symbol symbol : element.getSymbols()) {
                        symbol.getText();
                    }
                }
            }
        }
    }

    private void drawResult(Text.Line line) {
        // line的旋转角度(以度为单位,顺时针为正,范围为[-180, 180])
        float angle = line.getAngle() + ORIENTATION;
        // 检测到的文本的轴对齐边界矩形
        Rect boundingBox = line.getBoundingBox();
        // 从左上角开始顺时针方向的四个角点。不带旋转角度,如果设置过旋转角度camera.setDisplayOrientation,需要进行旋转
        Point[] cornerPoints = line.getCornerPoints();
        // 置信度
        float confidence = line.getConfidence();
        // 获取文本中的主要语言(如果有的话)
        String recognizedLanguage = line.getRecognizedLanguage();
        // 置信度太低的过滤掉
        if (confidence < 0.3f) {
            return;
        }
        for (Point cornerPoint : cornerPoints) {
            float[] floats = {cornerPoint.x, cornerPoint.y};
            matrix.mapPoints(floats);
            cornerPoint.x = (int) floats[0];
            cornerPoint.y = (int) floats[1];
        }
        ocrArea.add(cornerPoints, line.getText());
        ocrArea.postInvalidate();
    }

    /**
     * Convert camera data into bitmap data.
     */
    private Bitmap convertToBitmap(Camera camera, byte[] data) {
        int width = camera.getParameters().getPreviewSize().width;
        int height = camera.getParameters().getPreviewSize().height;
        YuvImage yuv = new YuvImage(data, ImageFormat.NV21, width, height, null);
        ByteArrayOutputStream stream = new ByteArrayOutputStream();
        yuv.compressToJpeg(new Rect(0, 0, width, height), 100, stream);
        return BitmapFactory.decodeByteArray(stream.toByteArray(), 0, stream.toByteArray().length);
    }

    @Override
    protected void onResume() {
        super.onResume();
    }

    @Override
    protected void onRestart() {
        super.onRestart();
    }

    @Override
    protected void onDestroy() {
        super.onDestroy();
        if (recognizer != null) {
            recognizer.close();
        }
    }

    @Override
    public void surfaceCreated(@NonNull SurfaceHolder holder) {
        try {
            camera = Camera.open(Camera.CameraInfo.CAMERA_FACING_BACK);
            parameters = camera.getParameters();
            // 旋转了90度,所以height、width互换
            parameters.setPictureSize(HEIGHT, WIDTH);
            parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);
            parameters.setPictureFormat(ImageFormat.NV21);
            camera.setPreviewDisplay(holder);
            camera.setDisplayOrientation(ORIENTATION);
            camera.setParameters(parameters);
        } catch (Exception exception) {
            Log.i(TAG, exception.getMessage());
        }
    }

    @Override
    public void surfaceChanged(@NonNull SurfaceHolder holder, int format, int width, int height) {
        if (camera != null) {
            camera.stopPreview();
            camera.setPreviewCallback(null);
            camera.startPreview();
            camera.setPreviewCallback(this);
            ocrArea.clear();
            stopRecognizer = true;
            ocrSwitch.performClick();
        }
    }

    @Override
    public void surfaceDestroyed(@NonNull SurfaceHolder holder) {
        if (camera != null) {
            camera.stopPreview();
            camera.setPreviewCallback(null);
            camera.release();
        }
    }
}

参考文章

  1. 文字识别 v2
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值