C++实例 调用Tesseract OCR的API

C++实例 调用Tesseract OCR的API

    • [1. 前言](#1. 前言)
    • [2. 模式](#2. 模式)
    • [3. 调用方式C++ Examples**【转自官网】](#3. 调用方式C++ Examples**【转自官网】)
      • [`3.1 Basic_example`](#3.1 Basic_example)
      • [`3.2 SetRectangle_example`](#3.2 SetRectangle_example)
      • [`3.3 GetComponentImages_example`](#3.3 GetComponentImages_example)
      • [`3.4 ResultIterator_example`](#3.4 ResultIterator_example)
      • [`3.5 OSD_example`](#3.5 OSD_example)
      • [`3.6 LSTM_Choices_example`](#3.6 LSTM_Choices_example)
      • [`3.7 OpenCV_example`](#3.7 OpenCV_example)
      • [`3.8 UserPatterns_example`](#3.8 UserPatterns_example)

1. 前言

Tesseract OCR支持不同调用方式(详情请看具体实例),同一种调用方式也可以设置不同模式。

调用方法或模式不同,对OCR识别结果的精度有一定影响。模式设置不同,输出的结果格式也不一致。

实际项目中,需要根据需求比较各方法的优劣从而选择最合适的。

2. 模式

cpp 复制代码
构造体定义
enum PageIteratorLevel {
  RIL_BLOCK,    // Block of text/image/separator line.
  RIL_PARA,     // Paragraph within a block.
  RIL_TEXTLINE, // Line within a paragraph.
  RIL_WORD,     // Word within a textline.
  RIL_SYMBOL    // Symbol/character within a word.
};

RIL_BLOCK:把原稿分割成不同区域,按区域识别文字,OCR结果是每个区域的字符串

RIL_TEXTLINE:按行识别文字,OCR结果是一行一行的字符串

RIL_WORD: 按单词识别文字,OCR结果是一个一个的单词

RIL_SYMBOL:按字符识别文字,OCR结果是一个一个的字符

3. 调用方式C++ Examples**【转自官网】

C++API实例:https://tesseract-ocr.github.io/tessdoc/Examples_C++.html

API实例:https://tesseract-ocr.github.io/tessdoc/#api-examples

注意

如果C++的实例代码编译不通过,可能是需要添加以下头文件

cpp 复制代码
#include <leptonica/pix_internal.h>

3.1 Basic_example

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    char *outText;

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // Initialize tesseract-ocr with English, without specifying tessdata path
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    // Open input image with leptonica library
    Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
    api->SetImage(image);
    // Get OCR result
    outText = api->GetUTF8Text();
    printf("OCR output:\n%s", outText);

    // Destroy used object and release memory
    api->End();
    delete api;
    delete [] outText;
    pixDestroy(&image);

    return 0;
}

3.2 SetRectangle_example

如果只想识别特定区域的文字,可以用这个方法。需提前设定指定区域的坐标。

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    char *outText;

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // Initialize tesseract-ocr with English, without specifying tessdata path
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    // Open input image with leptonica library
    Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
    api->SetImage(image);
    // Restrict recognition to a sub-rectangle of the image
    // SetRectangle(left, top, width, height)
    api->SetRectangle(30, 86, 590, 100);
    // Get OCR result
    outText = api->GetUTF8Text();
    printf("OCR output:\n%s", outText);

    // Destroy used object and release memory
    api->End();
    delete api;
    delete [] outText;
    pixDestroy(&image);

    return 0;
}

3.3 GetComponentImages_example

以Box的形式返回OCR结果

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
  char *outText;
  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  // Initialize tesseract-ocr with English, without specifying tessdata path
  if (api->Init(NULL, "eng")) {
      fprintf(stderr, "Could not initialize tesseract.\n");
      exit(1);
  }
  Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
  api->SetImage(image);
  Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);
  printf("Found %d textline image components.\n", boxes->n);
  for (int i = 0; i < boxes->n; i++) {
    BOX* box = boxaGetBox(boxes, i, L_CLONE);
    api->SetRectangle(box->x, box->y, box->w, box->h);
    char* ocrResult = api->GetUTF8Text();
    int conf = api->MeanTextConf();
    fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
                    i, box->x, box->y, box->w, box->h, conf, ocrResult);
    boxDestroy(&box);
  }
  // Destroy used object and release memory
  api->End();
  delete api;
  delete [] outText;
  pixDestroy(&image);

  return 0;
}

3.4 ResultIterator_example

以迭代器的形式返回OCR结果

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
  char *outText;
  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  // Initialize tesseract-ocr with English, without specifying tessdata path
  if (api->Init(NULL, "eng")) {
      fprintf(stderr, "Could not initialize tesseract.\n");
      exit(1);
  }
  Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
  api->SetImage(image);
  api->Recognize(0);
  tesseract::ResultIterator* ri = api->GetIterator();
  tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
  if (ri != 0) {
    do {
      const char* word = ri->GetUTF8Text(level);
      float conf = ri->Confidence(level);
      int x1, y1, x2, y2;
      ri->BoundingBox(level, &x1, &y1, &x2, &y2);
      printf("word: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
               word, conf, x1, y1, x2, y2);
      delete[] word;
    } while (ri->Next(level));
  }
  // Destroy used object and release memory
  api->End();
  delete api;
  delete [] outText;
  pixDestroy(&image);
  return 0;
}

3.5 OSD_example

如果需要判断文字的方向,可以参考这各个方法

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    const char* inputfile = "/tesseract/testing/devatest-rotated-270.png";
    PIX *image = pixRead(inputfile);
    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    api->Init(NULL, "osd");
    api->SetPageSegMode(tesseract::PSM_OSD_ONLY);
    api->SetImage(image);
        
    int orient_deg;
    float orient_conf;
    const char* script_name;
    float script_conf;
    api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf);
    printf("************\n Orientation in degrees: %d\n Orientation confidence: %.2f\n"
    " Script: %s\n Script confidence: %.2f\n",
    orient_deg, orient_conf,
    script_name, script_conf);
    
    // Destroy used object and release memory
    api->End();
    delete api;
    pixDestroy(&image);
    
    return 0;
}

3.6 LSTM_Choices_example

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
int main()
{
    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }
// Open input image with leptonica library
  Pix *image = pixRead("choices.png");
  api->SetImage(image);
// Set lstm_choice_mode to alternative symbol choices per character, bbox is at word level.
  api->SetVariable("lstm_choice_mode", "2");
  api->Recognize(0);
  tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
  tesseract::ResultIterator* res_it = api->GetIterator();
// Get confidence level for alternative symbol choices. Code is based on 
// https://github.com/tesseract-ocr/tesseract/blob/main/src/api/hocrrenderer.cpp#L325-L344
  std::vector<std::vector<std::pair<const char*, float>>>* choiceMap = nullptr;
  if (res_it != 0) {
    do {
      const char* word;
      float conf;
      int x1, y1, x2, y2, tcnt = 1, gcnt = 1, wcnt = 0;
     res_it->BoundingBox(level, &x1, &y1, &x2, &y2);
     choiceMap = res_it->GetBestLSTMSymbolChoices();
      for (auto timestep : *choiceMap) {
        if (timestep.size() > 0) {
          for (auto & j : timestep) {
            conf = int(j.second * 100);
            word =  j.first;
            printf("%d  symbol: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
                        wcnt, word, conf, x1, y1, x2, y2);
           gcnt++;
          }
          tcnt++;
        }
      wcnt++;
      printf("\n");
      }
    } while (res_it->Next(level));
  }
// Destroy used object and release memory
    api->End();
    delete api;
    pixDestroy(&image);
    return 0;
}

3.7 OpenCV_example

/*

Windows compile example:

SET TESS_INSTALATION=C:/win64

SET OPENCV_INSTALATION=C:/opencv/build

cl OpenCV_example.cc -I %TESS_INSTALATION%/include -I %OPENCV_INSTALATION%/include /link /LIBPATH:%TESS_INSTALATION%/lib /LIBPATH:%OPENCV_INSTALATION%/x64/vc14/lib tesseract51.lib leptonica-1.83.0.lib opencv_world460.lib /machine:x64

*/

#include <leptonica/allheaders.h>

#include <opencv2/opencv.hpp>

#include

#include <tesseract/baseapi.h>

int main(int argc, char *argv[]) {

std::string outText, imPath = argv[1];

cv::Mat im = cv::imread(imPath, cv::IMREAD_COLOR);

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY);

api->SetPageSegMode(tesseract::PSM_AUTO);

api->SetImage(im.data, im.cols, im.rows, 3, im.step);

outText = std::string(api->GetUTF8Text());

std::cout << outText;

api->End();

delete api;

return 0;

}

3.8 UserPatterns_example

cpp 复制代码
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    Pix *image;
    char *outText;
    char *configs[]={"path/to/my.patterns.config"};
    int configs_size = 1;
    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    if (api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY, configs, configs_size, NULL, NULL, false)) {
      fprintf(stderr, "Could not initialize tesseract.\n");
      exit(1);
    }
    image = pixRead("Arial.png");
    api->SetImage(image);
    outText = api->GetUTF8Text();
    printf(outText);
    api->End();
    delete api;
    delete [] outText;
    pixDestroy(&image);
    return 0;
}
相关推荐
羑悻的小杀马特1 小时前
【AIGC篇】畅谈游戏开发设计中AIGC所发挥的不可或缺的作用
c++·人工智能·aigc·游戏开发
闻缺陷则喜何志丹1 小时前
【C++动态规划】1105. 填充书架|2104
c++·算法·动态规划·力扣·高度·最小·书架
初学者丶一起加油2 小时前
C语言基础:指针(数组指针与指针数组)
linux·c语言·开发语言·数据结构·c++·算法·visual studio
CodeClimb3 小时前
【华为OD-E卷-租车骑绿道 100分(python、java、c++、js、c)】
java·javascript·c++·python·华为od
易码智能3 小时前
【RealTimeCallBack】- KRTS C++示例精讲(4)
c++·定时器·kithara·windows 实时套件·krts
小王爱吃月亮糖3 小时前
QT-QVariant类应用
开发语言·c++·笔记·qt·visual studio
计科土狗3 小时前
基于c语言的union、字符串、格式化输入输出
c++
闻缺陷则喜何志丹3 小时前
【C++动态规划】1458. 两个子序列的最大点积|1823
c++·算法·动态规划·力扣·最大·子序列·点积
半盏茶香3 小时前
C语言勘破之路-最终篇 —— 预处理(上)
c语言·开发语言·数据结构·c++·算法
没事就去码4 小时前
RBTree(红黑树)
数据结构·c++