更好的方法是使用 ResultIterator; 您可以使用 tesseract::RIL_BLOCK、tesseract::RIL_PARA、tesseract::RIL_TEXTLINE、tesseract::RIL_WORD 或 tesseract::RIL_SYMBOL
来自https://code.google.com/p/tesseract-ocr/wiki/APIExample:
tesseract::TessBaseAPI api;
// tesseract.Init here
api.SetVariable("save_blob_choices", "T");
// tesseract.SetImage/tesseract.SetRectangle here
api.Recognize(NULL);
tesseract::ResultIterator* ri = api.GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (ri) {
do {
const char* word = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
word, conf, x1, y1, x2, y2);
delete[] word;
} while (ri->Next(level));
}