我正在使用 tesseract 在我的项目中进行 OCR。我使用了 72 dpi 的图像。但它给我的输出远非准确。我在某处读到,输入 200 dpi 以上是很好的。但是 tesseract 库的标准图像配置(即大小、dpi、文件格式等)是什么?
4 回答
由于字数限制,这是我回答的第二部分。
这是.mm
.m
/*
* Image.cpp
* ImageProcessing
*
*
*
*/
#include "Image.h"
#include <stack>
@implementation ImageWrapper
@synthesize image;
@synthesize ownsImage;
+ (ImageWrapper *) imageWithCPPImage:(Image *) theImage;
{
ImageWrapper *wrapper = [[ImageWrapper alloc] init];
wrapper.image=theImage;
wrapper.ownsImage=true;
return [wrapper autorelease];
}
+ (ImageWrapper *) imageWithCPPImage:(Image *) theImage ownsImage:(bool) ownsTheImage;
{
ImageWrapper *wrapper = [[ImageWrapper alloc] init];
wrapper.image=theImage;
wrapper.ownsImage=ownsTheImage;
return [wrapper autorelease];
}
- (void) dealloc
{
// delete the image that we have been holding onto
if(ownsImage) delete image;
[super dealloc];
}
@end
void Image::initYptrs() {
m_yptrs=(uint8_t **) malloc(sizeof(uint8_t *)*m_height);
for(int i=0; i<m_height; i++) {
m_yptrs[i]=m_imageData+i*m_width;
}
}
Image::Image(ImageWrapper *other, int x1, int y1, int x2, int y2) {
m_width=x2-x1;
m_height=y2-y1;
m_imageData=(uint8_t *) malloc(m_width*m_height);
initYptrs();
Image *otherImage=other.image;
for(int y=y1; y<y2; y++) {
for(int x=x1; x<x2; x++) {
(*this)[y-y1][x-x1]=(*otherImage)[y][x];
}
}
m_ownsData=true;
}
Image::Image(int width, int height) {
m_imageData=(uint8_t *) malloc(width*height);
m_width=width;
m_height=height;
m_ownsData=true;
initYptrs();
}
// create an image from data
Image::Image(uint8_t *imageData, int width, int height, bool ownsData) {
m_imageData=imageData;
m_width=width;
m_height=height;
m_ownsData=ownsData;
initYptrs();
}
Image::Image(UIImage *srcImage, int width, int height, CGInterpolationQuality interpolation, bool imageIsRotatedBy90degrees) {
if(imageIsRotatedBy90degrees) {
int tmp=width;
width=height;
height=tmp;
}
m_width=width;
m_height=height;
// get hold of the image bytes
m_imageData=(uint8_t *) malloc(m_width*m_height);
CGColorSpaceRef colorSpace=CGColorSpaceCreateDeviceGray();
CGContextRef context=CGBitmapContextCreate(m_imageData, m_width, m_height, 8, m_width, colorSpace, kCGImageAlphaNone);
CGContextSetInterpolationQuality(context, interpolation);
CGContextSetShouldAntialias(context, NO);
CGContextDrawImage(context, CGRectMake(0,0, m_width, m_height), [srcImage CGImage]);
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
if(imageIsRotatedBy90degrees) {
uint8_t *tmpImage=(uint8_t *) malloc(m_width*m_height);
for(int y=0; y<m_height; y++) {
for(int x=0; x<m_width; x++) {
tmpImage[x*m_height+y]=m_imageData[(m_height-y-1)*m_width+x];
}
}
int tmp=m_width;
m_width=m_height;
m_height=tmp;
free(m_imageData);
m_imageData=tmpImage;
}
initYptrs();
}
void Image::normalise() {
int min=INT_MAX;
int max=0;
for(int i=0; i<m_width*m_height; i++) {
if(m_imageData[i]>max) max=m_imageData[i];
if(m_imageData[i]<min) min=m_imageData[i];
}
for(int i=0; i<m_width*m_height; i++) {
m_imageData[i]=255*(m_imageData[i]-min)/(max-min);
}
}
// copy a section of another image
ImageWrapper *Image::createImage(ImageWrapper *other, int x1, int y1, int x2, int y2)
{
return [ImageWrapper imageWithCPPImage:new Image(other, x1, y1, x2, y2)];
}
// create an empty image of the required width and height
ImageWrapper *Image::createImage(int width, int height) {
return [ImageWrapper imageWithCPPImage:new Image(width, height)];
}
// create an image from data
ImageWrapper *Image::createImage(uint8_t *imageData, int width, int height, bool ownsData) {
return [ImageWrapper imageWithCPPImage:new Image(imageData, width, height, ownsData)];
}
// take a source UIImage and convert it to greyscale
ImageWrapper *Image::createImage(UIImage *srcImage, int width, int height, bool imageIsRotatedBy90degrees) {
return [ImageWrapper imageWithCPPImage:new Image(srcImage, width, height, kCGInterpolationHigh, imageIsRotatedBy90degrees)];
}
void Image::extractConnectedRegion(int x, int y, std::vector<ImagePoint> *points) {
(*points).push_back(ImagePoint(x,y));
(*this)[y][x]=0;
int left, right;
left=x-1;
right=x+1;
while(left>=0 && (*this)[y][left]!=0) {
(*this)[y][left]=0;
(*points).push_back(ImagePoint(left,y));
left--;
}
while(right<m_width && (*this)[y][right]!=0) {
(*this)[y][right]=0;
(*points).push_back(ImagePoint(right,y));
right++;
}
for(int i=left; i<=right; i++) {
if(i>=0 && i<m_width) {
if(y>0 && (*this)[y-1][i]!=0) {
extractConnectedRegion(i, y-1, points);
}
if(y<(m_height-1) && (*this)[y+1][i]!=0) {
extractConnectedRegion(i, y+1, points);
}
}
}
}
inline int findThresholdAtPosition(int startx, int starty, int size, Image* src) {
int total=0;
for(int y=starty; y<starty+size; y++) {
for(int x=startx; x<startx+size; x++) {
total+=(*src)[y][x];
}
}
int threshold=total/(size*size);
return threshold;
};
/*
ImageWrapper* Image::autoLocalThreshold() {
const int local_size=10;
// now produce the thresholded image
Image *result=new Image(m_width, m_height);
// process the image
int threshold=0;
for(int y=local_size/2; y<m_height-local_size/2; y++) {
for(int x=local_size/2; x<m_width-local_size/2; x++) {
threshold=findThresholdAtPosition(x-local_size/2, y-local_size/2, local_size, this);
int val=(*this)[y][x];
if(val>threshold*0.9)
(*result)[y][x]=0;
else
(*result)[y][x]=255;
}
}
return [ImageWrapper imageWithCPPImage:result];
}
*/
ImageWrapper* Image::autoLocalThreshold() {
const int local_size=8;
// now produce the thresholded image
uint8_t *result=(uint8_t*) malloc(m_width*m_height);
// get the initial total
int total=0;
for(int y=0; y<local_size; y++) {
for(int x=0; x<local_size; x++) {
total+=(*this)[y][x];
}
}
// process the image
int lastIndex=m_width*m_height-(m_width*local_size/2+local_size/2);
for(int index=m_width*local_size/2+local_size/2; index<lastIndex; index++) {
int threshold=total/64;
if(m_imageData[index]>threshold*0.9)
result[index]=0;
else
result[index]=255;
// calculate the new total
for(int index2=index-m_width*local_size/2-local_size/2; index2<index+m_width*local_size/2-local_size/2; index2+=m_width) {
total-=m_imageData[index2];
total+=m_imageData[index2+local_size];
}
}
return Image::createImage(result, m_width, m_height, true);
}
ImageWrapper *Image::autoThreshold() {
int total=0;
int count=0;
for(int y=0; y<m_height; y++) {
for(int x=0; x<m_width; x++) {
total+=(*this)[y][x];
count++;
}
}
int threshold=total/count;
Image *result=new Image(m_width, m_height);
for(int y=0; y<m_height; y++) {
for(int x=0; x<m_width; x++) {
if((*this)[y][x]>threshold*0.8) {
(*result)[y][x]=0;
} else {
(*result)[y][x]=255;
}
}
}
return [ImageWrapper imageWithCPPImage:result];
}
#define NOEDGE 255
#define POSSIBLE_EDGE 128
#define EDGE 0
void non_max_supp(int *mag, int *gradx, int *grady, int nrows, int ncols,
uint8_t *result)
{
int rowcount, colcount,count;
int *magrowptr,*magptr;
int *gxrowptr,*gxptr;
int *gyrowptr,*gyptr,z1,z2;
int m00,gx,gy;
float mag1,mag2,xperp,yperp;
uint8_t *resultrowptr, *resultptr;
/****************************************************************************
* Zero the edges of the result image.
****************************************************************************/
for(count=0,resultrowptr=result,resultptr=result+ncols*(nrows-1);
count<ncols; resultptr++,resultrowptr++,count++){
*resultrowptr = *resultptr = (unsigned char) 0;
}
for(count=0,resultptr=result,resultrowptr=result+ncols-1;
count<nrows; count++,resultptr+=ncols,resultrowptr+=ncols){
*resultptr = *resultrowptr = (unsigned char) 0;
}
/****************************************************************************
* Suppress non-maximum points.
****************************************************************************/
for(rowcount=1,magrowptr=mag+ncols+1,gxrowptr=gradx+ncols+1,
gyrowptr=grady+ncols+1,resultrowptr=result+ncols+1;
rowcount<nrows-2;
rowcount++,magrowptr+=ncols,gyrowptr+=ncols,gxrowptr+=ncols,
resultrowptr+=ncols){
for(colcount=1,magptr=magrowptr,gxptr=gxrowptr,gyptr=gyrowptr,
resultptr=resultrowptr;colcount<ncols-2;
colcount++,magptr++,gxptr++,gyptr++,resultptr++){
m00 = *magptr;
if(m00 == 0){
*resultptr = (unsigned char) NOEDGE;
}
else{
xperp = -(gx = *gxptr)/((float)m00);
yperp = (gy = *gyptr)/((float)m00);
}
if(gx >= 0){
if(gy >= 0){
if (gx >= gy)
{
/* 111 */
/* Left point */
z1 = *(magptr - 1);
z2 = *(magptr - ncols - 1);
mag1 = (m00 - z1)*xperp + (z2 - z1)*yperp;
/* Right point */
z1 = *(magptr + 1);
z2 = *(magptr + ncols + 1);
mag2 = (m00 - z1)*xperp + (z2 - z1)*yperp;
}
else
{
/* 110 */
/* Left point */
z1 = *(magptr - ncols);
z2 = *(magptr - ncols - 1);
mag1 = (z1 - z2)*xperp + (z1 - m00)*yperp;
/* Right point */
z1 = *(magptr + ncols);
z2 = *(magptr + ncols + 1);
mag2 = (z1 - z2)*xperp + (z1 - m00)*yperp;
}
}
else
{
if (gx >= -gy)
{
/* 101 */
/* Left point */
z1 = *(magptr - 1);
z2 = *(magptr + ncols - 1);
mag1 = (m00 - z1)*xperp + (z1 - z2)*yperp;
/* Right point */
z1 = *(magptr + 1);
z2 = *(magptr - ncols + 1);
mag2 = (m00 - z1)*xperp + (z1 - z2)*yperp;
}
else
{
/* 100 */
/* Left point */
z1 = *(magptr + ncols);
z2 = *(magptr + ncols - 1);
mag1 = (z1 - z2)*xperp + (m00 - z1)*yperp;
/* Right point */
z1 = *(magptr - ncols);
z2 = *(magptr - ncols + 1);
mag2 = (z1 - z2)*xperp + (m00 - z1)*yperp;
}
}
}
else
{
if ((gy = *gyptr) >= 0)
{
if (-gx >= gy)
{
/* 011 */
/* Left point */
z1 = *(magptr + 1);
z2 = *(magptr - ncols + 1);
mag1 = (z1 - m00)*xperp + (z2 - z1)*yperp;
/* Right point */
z1 = *(magptr - 1);
z2 = *(magptr + ncols - 1);
mag2 = (z1 - m00)*xperp + (z2 - z1)*yperp;
}
else
{
/* 010 */
/* Left point */
z1 = *(magptr - ncols);
z2 = *(magptr - ncols + 1);
mag1 = (z2 - z1)*xperp + (z1 - m00)*yperp;
/* Right point */
z1 = *(magptr + ncols);
z2 = *(magptr + ncols - 1);
mag2 = (z2 - z1)*xperp + (z1 - m00)*yperp;
}
}
else
{
if (-gx > -gy)
{
/* 001 */
/* Left point */
z1 = *(magptr + 1);
z2 = *(magptr + ncols + 1);
mag1 = (z1 - m00)*xperp + (z1 - z2)*yperp;
/* Right point */
z1 = *(magptr - 1);
z2 = *(magptr - ncols - 1);
mag2 = (z1 - m00)*xperp + (z1 - z2)*yperp;
}
else
{
/* 000 */
/* Left point */
z1 = *(magptr + ncols);
z2 = *(magptr + ncols + 1);
mag1 = (z2 - z1)*xperp + (m00 - z1)*yperp;
/* Right point */
z1 = *(magptr - ncols);
z2 = *(magptr - ncols - 1);
mag2 = (z2 - z1)*xperp + (m00 - z1)*yperp;
}
}
}
/* Now determine if the current point is a maximum point */
if ((mag1 > 0.0) || (mag2 > 0.0))
{
*resultptr = (unsigned char) NOEDGE;
}
else
{
if (mag2 == 0.0)
*resultptr = (unsigned char) NOEDGE;
else
*resultptr = (unsigned char) POSSIBLE_EDGE;
}
}
}
}
void follow_edges(uint8_t *edgemapptr, int *edgemagptr, short lowval,
int cols)
{
int *tempmagptr;
uint8_t *tempmapptr;
int i;
int x[8] = {1,1,0,-1,-1,-1,0,1},
y[8] = {0,1,1,1,0,-1,-1,-1};
for(i=0;i<8;i++){
tempmapptr = edgemapptr - y[i]*cols + x[i];
tempmagptr = edgemagptr - y[i]*cols + x[i];
if((*tempmapptr == POSSIBLE_EDGE) && (*tempmagptr > lowval)){
*tempmapptr = (unsigned char) EDGE;
follow_edges(tempmapptr,tempmagptr, lowval, cols);
}
}
}
void apply_hysteresis(int *mag, uint8_t *nms, int rows, int cols,
float tlow, float thigh, uint8_t *edge)
{
int r, c, pos, numedges, highcount, lowthreshold, highthreshold,hist[32768];
int maximum_mag;
/****************************************************************************
* Initialize the edge map to possible edges everywhere the non-maximal
* suppression suggested there could be an edge except for the border. At
* the border we say there can not be an edge because it makes the
* follow_edges algorithm more efficient to not worry about tracking an
* edge off the side of the image.
****************************************************************************/
for(r=0,pos=0;r<rows;r++){
for(c=0;c<cols;c++,pos++){
if(nms[pos] == POSSIBLE_EDGE) edge[pos] = POSSIBLE_EDGE;
else edge[pos] = NOEDGE;
}
}
for(r=0,pos=0;r<rows;r++,pos+=cols){
edge[pos] = NOEDGE;
edge[pos+cols-1] = NOEDGE;
}
pos = (rows-1) * cols;
for(c=0;c<cols;c++,pos++){
edge[c] = NOEDGE;
edge[pos] = NOEDGE;
}
/****************************************************************************
* Compute the histogram of the magnitude image. Then use the histogram to
* compute hysteresis thresholds.
****************************************************************************/
for(r=0;r<32768;r++) hist[r] = 0;
for(r=0,pos=0;r<rows;r++){
for(c=0;c<cols;c++,pos++){
if(edge[pos] == POSSIBLE_EDGE) hist[mag[pos]]++;
}
}
/****************************************************************************
* Compute the number of pixels that passed the nonmaximal suppression.
****************************************************************************/
for(r=1,numedges=0;r<32768;r++){
if(hist[r] != 0) maximum_mag = r;
numedges += hist[r];
}
highcount = (int)(numedges * thigh + 0.5);
/****************************************************************************
* Compute the high threshold value as the (100 * thigh) percentage point
* in the magnitude of the gradient histogram of all the pixels that passes
* non-maximal suppression. Then calculate the low threshold as a fraction
* of the computed high threshold value. John Canny said in his paper
* "A Computational Approach to Edge Detection" that "The ratio of the
* high to low threshold in the implementation is in the range two or three
* to one." That means that in terms of this implementation, we should
* choose tlow ~= 0.5 or 0.33333.
****************************************************************************/
r = 1;
numedges = hist[1];
while((r<(maximum_mag-1)) && (numedges < highcount)){
r++;
numedges += hist[r];
}
highthreshold = r;
lowthreshold = (int)(highthreshold * tlow + 0.5);
/*
if(VERBOSE){
printf("The input low and high fractions of %f and %f computed to\n",
tlow, thigh);
printf("magnitude of the gradient threshold values of: %d %d\n",
lowthreshold, highthreshold);
}
*/
/****************************************************************************
* This loop looks for pixels above the highthreshold to locate edges and
* then calls follow_edges to continue the edge.
****************************************************************************/
for(r=0,pos=0;r<rows;r++){
for(c=0;c<cols;c++,pos++){
if((edge[pos] == POSSIBLE_EDGE) && (mag[pos] >= highthreshold)){
edge[pos] = EDGE;
follow_edges((edge+pos), (mag+pos), lowthreshold, cols);
}
}
}
/****************************************************************************
* Set all the remaining possible edges to non-edges.
****************************************************************************/
for(r=0,pos=0;r<rows;r++){
for(c=0;c<cols;c++,pos++) if(edge[pos] != EDGE) edge[pos] = NOEDGE;
}
}
/*
tlow 0.20-0.50
thigh 0.60-0.90
*/
ImageWrapper *Image::cannyEdgeExtract(float tlow, float thigh) {
int gx[3][3]={
{ -1, 0, 1 },
{ -2, 0, 2 },
{ -1, 0, 1 }};
int gy[3][3]={
{ 1, 2, 1 },
{ 0, 0, 0 },
{ -1, -2, -1 }};
int resultWidth=m_width-3;
int resultHeight=m_height-3;
int *diffx=(int *) malloc(sizeof(int)*resultHeight*resultWidth);
int *diffy=(int *) malloc(sizeof(int)*resultHeight*resultWidth);
int *mag=(int *) malloc(sizeof(int)*resultHeight*resultWidth);
memset(diffx, 0, sizeof(int)*resultHeight*resultWidth);
memset(diffy, 0, sizeof(int)*resultHeight*resultWidth);
memset(mag, 0, sizeof(int)*resultHeight*resultWidth);
// compute the magnitute and the angles in the image
for(int y=0; y<m_height-3; y++) {
for(int x=0; x<m_width-3; x++) {
int resultX=0;
int resultY=0;
for(int dy=0; dy<3; dy++) {
for(int dx=0; dx<3; dx++) {
int pixel=(*this)[y+dy][x+dx];
resultX+=pixel*gx[dy][dx];
resultY+=pixel*gy[dy][dx];
}
}
mag[y*resultWidth+x]=abs(resultX)+abs(resultY);
diffx[y*resultWidth+x]=resultX;
diffy[y*resultWidth+x]=resultY;
}
}
uint8_t*nms=(uint8_t *) malloc(sizeof(uint8_t)*resultHeight*resultWidth);
memset(nms, 0, sizeof(uint8_t)*resultHeight*resultWidth);
non_max_supp(mag, diffx, diffy, resultHeight, resultWidth, nms);
free(diffx);
free(diffy);
uint8_t *edge=(uint8_t *) malloc(sizeof(uint8_t)*resultHeight*resultWidth);
memset(edge, 0, sizeof(uint8_t)*resultHeight*resultWidth);
apply_hysteresis(mag, nms, resultHeight, resultWidth, tlow, thigh, edge);
free(nms);
free(mag);
Image *result=new Image(edge, resultWidth, resultHeight, true);
return [ImageWrapper imageWithCPPImage:result];
}
// rotate by 90, 180, 270, 360
ImageWrapper *Image::rotate(int angle) {
Image* result;
switch(angle) {
case 90:
case 270:
result=new Image(m_height, m_width);
break;
case 180:
result=new Image(m_width, m_height);
break;
}
for(int y=0; y< m_height; y++) {
for(int x=0; x<m_width; x++) {
switch(angle) {
case 90:
(*result)[m_width-x-1][y]=(*this)[y][x];
break;
case 180:
(*result)[m_height-y-1][x]=(*this)[y][x];
break;
case 270:
(*result)[x][y]=(*this)[y][x];
break;
}
}
}
return [ImageWrapper imageWithCPPImage:result];
}
ImageWrapper *Image::gaussianBlur() {
int blur[5][5]={
{ 1, 4, 7, 4, 1 },
{ 4,16,26,16, 4 },
{ 7,26,41,26, 7 },
{ 4,16,26,16, 4 },
{ 1, 4, 7, 4, 1 }};
Image *result=new Image(m_width-5, m_height-5);
for(int y=0; y<m_height-5; y++) {
for(int x=0; x<m_width-5; x++) {
int val=0;
for(int dy=0; dy<5; dy++) {
for(int dx=0; dx<5; dx++) {
int pixel=(*this)[y+dy][x+dx];
val+=pixel*blur[dy][dx];
}
}
(*result)[y][x]=val/273;
}
}
return [ImageWrapper imageWithCPPImage:result];
}
void Image::HistogramEqualisation() {
std::vector<int> pdf(256);
std::vector<int> cdf(256);
// compute the pdf
for(int i=0; i<m_height*m_width; i++) {
pdf[m_imageData[i]]++;
}
// compute the cdf
cdf[0]=pdf[0];
for(int i=1; i<256; i++) {
cdf[i]=cdf[i-1]+pdf[i];
}
// now map the pixels to the new values
for(int i=0; i<m_height*m_width; i++) {
m_imageData[i]=255*cdf[m_imageData[i]]/cdf[255];
}
}
UIImage *Image::toUIImage() {
// generate space for the result
uint8_t *result=(uint8_t *) calloc(m_width*m_height*sizeof(uint32_t),1);
// process the image back to rgb
for(int i=0; i<m_height*m_width; i++) {
result[i*4]=0;
int val=m_imageData[i];
result[i*4+1]=val;
result[i*4+2]=val;
result[i*4+3]=val;
}
// create a UIImage
CGColorSpaceRef colorSpace=CGColorSpaceCreateDeviceRGB();
CGContextRef context=CGBitmapContextCreate(result, m_width, m_height, 8, m_width*sizeof(uint32_t), colorSpace, kCGBitmapByteOrder32Little|kCGImageAlphaNoneSkipLast);
CGImageRef image=CGBitmapContextCreateImage(context);
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
UIImage *resultUIImage=[UIImage imageWithCGImage:image];
CGImageRelease(image);
// make sure the data will be released by giving it to an autoreleased NSData
[NSData dataWithBytesNoCopy:result length:m_width*m_height];
return resultUIImage;
}
inline float Interpolate1(float a, float b, float c) {
float mu=c-floor(c);
return(a*(1-mu)+b*mu);
}
inline float Interpolate2(float a, float b, float c, float d, float x, float y)
{
float ab = Interpolate1(a,b,x);
float cd = Interpolate1(c,d,x);
return Interpolate1(ab,cd,y);
}
ImageWrapper *Image::resize(int newX, int newY) {
Image *result=new Image(newX, newY);
for(float y=0; y<newY; y++) {
for(float x=0; x<newX; x++) {
float srcX0=x*(float)(m_width-1)/(float)newX;
float srcY0=y*(float)(m_height-1)/(float)newY;
float srcX1=(x+1)*(float)(m_width-1)/(float)newX;
float srcY1=(y+1)*(float)(m_height-1)/(float)newY;
float val=0,count=0;
for(float srcY=srcY0; srcY<srcY1; srcY++) {
for(float srcX=srcX0; srcX<srcX1; srcX++) {
val+=Interpolate2((*this)[(int)srcY][(int) srcX], (*this)[(int)srcY][(int) srcX+1],
(*this)[(int)srcY+1][(int) srcX], (*this)[(int)srcY+1][(int) srcX+1],
srcX, srcY);
count++;
}
}
(*result)[(int) y][(int) x]=val/count;
}
}
return [ImageWrapper imageWithCPPImage:result];
}
void Image::findLargestStructure(std::vector<ImagePoint> *maxPoints) {
// process the image
std::vector<ImagePoint> points;
points.reserve(10000);
for(int y=0; y<m_height; y++) {
for(int x=0; x<m_width; x++) {
// if we've found a point in the image then extract everything connected to it
if((*this)[y][x]!=0) {
extractConnectedRegion(x, y, &points);
if(points.size()>maxPoints->size()) {
maxPoints->clear();
maxPoints->resize(points.size());
std::copy(points.begin(), points.end(), maxPoints->begin());
}
points.clear();
}
}
}
}
int findHeightAtX(Image *img, int x) {
// find the top most set pixel
bool foundTop;
int topY=0;
for(;topY<img->getHeight(); topY++) {
if((*img)[topY][x]==0) {
foundTop=true;
break;
}
}
if(foundTop) {
// find the bottom most set pixel
int bottomY=img->getHeight()-1;
for(;bottomY>0 && (*img)[bottomY][x]==0; bottomY--);
return bottomY-topY;
}
return -1;
}
void Image::skeletonise() {
bool changes=true;
while(changes) {
changes=false;
for(int y=1; y<m_height-1; y++) {
for(int x=1; x<m_width-1; x++) {
if((*this)[y][x]!=0) {
bool val[8];
val[0]=(*this)[y-1][x-1]!=0;
val[1]=(*this)[y-1][x]!=0;
val[2]=(*this)[y-1][x+1]!=0;
val[3]=(*this)[y][x+1]!=0;
val[4]=(*this)[y+1][x+1]!=0;
val[5]=(*this)[y+1][x]!=0;
val[6]=(*this)[y+1][x-1]!=0;
val[7]=(*this)[y][x-1]!=0;
bool remove=false;
for(int i=0; i<7 && !remove;i++) {
remove=(val[(0+i)%8] && val[(1+i)%8] && val[(7+i)%8] && val[(6+i)%8] && val[(5+i)%8] && !(val[(2+i)%8] || val[(3+i)%8] || val[(4+i)%8]))
|| (val[(0+i)%8] && val[(1+i)%8] && val[(7+i)%8] && !(val[(3+i)%8] || val[(6+i)%8] || val[(5+i)%8] || val[(4+i)%8])) ||
!(val[(0+i)%8] || val[(1+i)%8] || val[(2+i)%8] || val[(3+i)%8] || val[(4+i)%8] || val[(5+i)%8] || val[(6+i)%8] || val[(7+i)%8]);
}
if(remove) {
(*this)[y][x]=0;
changes=true;
}
}
}
}
}
}
除非您对图像应用一些图像预处理,否则 Tesseract 引擎本身并不能给出很好的结果...
这是我所做的,并达到了几乎 95% 的准确率......
将 UIImage 转换为灰度并将 autoLocalThreshold 应用于图像.. 为此,我使用了我在网上找到的一个类.. 并且请记住,如果您要调整图像大小,则保持纵横比并且图像编辑代码是正确的,因为有很多代码可能会在您不知情的情况下损坏图像..所以如果对您有帮助,这里有一些代码..
图像类转换灰度和自动本地阈值...
/*
* Image.h
* ImageProcessing
*
*
*
*/
#import <UIKit/UIImage.h>
#include <vector>
class Image;
// objective C wrapper for our image class
@interface ImageWrapper : NSObject {
Image *image;
bool ownsImage;
}
@property(assign, nonatomic) Image *image;
@property(assign, nonatomic) bool ownsImage;
+ (ImageWrapper *) imageWithCPPImage:(Image *) theImage;
@end
class ImagePoint {
public:
short x,y;
inline ImagePoint(short xpos, short ypos) {
x=xpos;
y=ypos;
}
inline ImagePoint(int xpos, int ypos) {
x=xpos;
y=ypos;
}
inline ImagePoint(const ImagePoint &other) {
x=other.x;
y=other.y;
}
inline ImagePoint() {
x=0; y=0;
}
};
class Image {
private:
uint8_t *m_imageData;
uint8_t **m_yptrs;
int m_width;
int m_height;
bool m_ownsData;
Image(ImageWrapper *other, int x1, int y1, int x2, int y2);
Image(int width, int height);
Image(uint8_t *imageData, int width, int height, bool ownsData=false);
Image(UIImage *srcImage, int width, int height, CGInterpolationQuality interpolation, bool imageIsRotatedBy90degrees=false);
void initYptrs();
public:
// copy a section of another image
static ImageWrapper *createImage(ImageWrapper *other, int x1, int y1, int x2, int y2);
// create an empty image of the required width and height
static ImageWrapper *createImage(int width, int height);
// create an image from data
static ImageWrapper *createImage(uint8_t *imageData, int width, int height, bool ownsData=false);
// take a source UIImage and convert it to greyscale
static ImageWrapper *createImage(UIImage *srcImage, int width, int height, bool imageIsRotatedBy90degrees=false);
// edge detection
ImageWrapper *cannyEdgeExtract(float tlow, float thigh);
// local thresholding
ImageWrapper* autoLocalThreshold();
// threshold using integral
ImageWrapper *autoIntegratingThreshold();
// threshold an image automatically
ImageWrapper *autoThreshold();
// gaussian smooth the image
ImageWrapper *gaussianBlur();
// get the percent set pixels
int getPercentSet();
// exrtact a connected area from the image
void extractConnectedRegion(int x, int y, std::vector<ImagePoint> *points);
// find the largest connected region in the image
void findLargestStructure(std::vector<ImagePoint> *maxPoints);
// normalise an image
void normalise();
// rotate by 90, 180, 270, 360
ImageWrapper *rotate(int angle);
// shrink to a new size
ImageWrapper *resize(int newX, int newY);
ImageWrapper *shrinkBy2();
// histogram equalisation
void HistogramEqualisation();
// skeltonize
void skeletonise();
// convert back to a UIImage for display
UIImage *toUIImage();
~Image() {
if(m_ownsData)
free(m_imageData);
delete m_yptrs;
}
inline uint8_t* operator[](const int rowIndex) {
return m_yptrs[rowIndex];
}
inline int getWidth() {
return m_width;
}
inline int getHeight() {
return m_height;
}
};
inline bool sortByX1(const ImagePoint &p1, const ImagePoint &p2) {
if(p1.x==p2.x) return p1.y<p2.y;
return p1.x<p2.x;
}
inline bool sortByY1(const ImagePoint &p1, const ImagePoint &p2) {
if(p1.y==p2.y) return p1.x<p2.x;
return p1.y<p2.y;
}
这就是你如何使用它...
[imageProcessing startTesseract];
UIImage *newImage = [imageProcessing resizeImage:image];
ImageWrapper *greyScale=Image::createImage(newImage, newImage.size.width, newImage.size.height);
ImageWrapper *edges=greyScale.image->autoLocalThreshold();
// show the results
NSString *text = [[NSString alloc]initWithString:[imageProcessing ocrImage:edges.image->toUIImage()]] ;
这里的 imageProcessing 包含启动 Tesseract 和其他相关功能的方法......
// METHOD TO DECODE THE MICR COMPONENTS USING TESSETACT
- (NSString *) ocrImage: (UIImage *) capturedImage
{
CGSize imageSize = [capturedImage size];
double bytes_per_line = CGImageGetBytesPerRow([capturedImage CGImage]);
double bytes_per_pixel = CGImageGetBitsPerPixel([capturedImage CGImage]) / 8.0;
CFDataRef data = CGDataProviderCopyData(CGImageGetDataProvider([capturedImage CGImage]));
const UInt8 *imageData = CFDataGetBytePtr(data);
imageThresholder = new tesseract::ImageThresholder();
imageThresholder->SetImage(imageData,(int) imageSize.width,(int) imageSize.height,(int)bytes_per_pixel,(int)bytes_per_line);
// CONVERTING THE IMAGE INTO PIXELS
objForTesseract->SetImage(imageThresholder->GetPixRectGrey());
// GET THE DECODED TEXT
objForTesseract->Recognize(NULL);
char* text = objForTesseract->GetUTF8Text();
CFRelease(data);
delete imageThresholder;
imageThresholder=nil;
objForTesseract->End();
delete objForTesseract;
objForTesseract=nil;
return [NSString stringWithCString:text encoding:NSUTF8StringEncoding];
}
我将提交 .mm 图像作为单独的答案...
在 Tesseract 的常见问题解答中找到答案。阅读问题:是否有最小文本大小?(它不会读取屏幕文本!)。
希望能帮助到你。
我不确定它是否会针对任何分辨率显示准确的输出。我昨天也在尝试同样的事情。我从相机中抓取图像以获得高分辨率图像。我还从照片库上传了低分辨率图像。但它没有奏效。我想您需要在这里进行更多搜索。
请查看OCR Api Service是否可以帮助您。你可以试试这个演示。演示中存在一些凭据问题。您需要提供一份。但我不确定需要哪些凭据。检查这是否可以帮助您。