android - 多语言Tesseract-ocr怎么做

Question

我已按照Youtube 上DemoImagetoText 的构建说明成功构建了 DemoImagetoText。然后我想通过做多语言OCR来开发这个应用程序。如果我想做多语言 OCR，我应该怎么做或改变这段代码。

public class main  extends Activity {

private CropImageView mCropImageView;
Bitmap converted;
EditText textView;
private TessOCR mTessOCR;
private Uri mCropImageUri;
public static final String lang = "eng";
public static final String DATA_PATH = Environment.getExternalStorageDirectory().toString() + "/DemoOCR/";
private ProgressDialog mProgressDialog;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.a_main);
    textView = (EditText)findViewById(R.id.editText);

    mCropImageView = (CropImageView) findViewById(R.id.CropImageView);
    String[] paths = new String[] { DATA_PATH, DATA_PATH + "tessdata/" };

    for (String path : paths) {
        File dir = new File(path);
        if (!dir.exists()) {
            if (!dir.mkdirs()) {
                Log.v("Main", "ERROR: Creation of directory " + path + " on sdcard failed");
                break;
            } else {
                Log.v("Main", "Created directory " + path + " on sdcard");
            }
        }

    }
    if (!(new File(DATA_PATH + "tessdata/" + lang + ".traineddata")).exists()) {
        try {

            AssetManager assetManager = getAssets();

            InputStream in = assetManager.open(lang + ".traineddata");
            //GZIPInputStream gin = new GZIPInputStream(in);
            OutputStream out = new FileOutputStream(DATA_PATH
                    + "tessdata/" + lang + ".traineddata");

            // Transfer bytes from in to out
            byte[] buf = new byte[1024];
            int len;
            //while ((lenf = gin.read(buff)) > 0) {
            while ((len = in.read(buf)) > 0) {
                out.write(buf, 0, len);
            }
            in.close();
            //gin.close();
            out.close();

            // Log.v(TAG, "Copied " + lang + " traineddata");
        } catch (IOException e) {
            // Log.e(TAG, "Was unable to copy " + lang + " traineddata " + e.toString());
        }


    }
    mTessOCR =new TessOCR();
}

/**
 * On load image button click, start pick image chooser activity.
 */
public void onLoadImageClick(View view) {
    startActivityForResult(getPickImageChooserIntent(), 200);
}

/**
 * Crop the image and set it back to the cropping view.
 */

public void onCropImageClick(View view) {
   Bitmap cropped = mCropImageView.getCroppedImage(500, 500);
    if (cropped != null)
        mCropImageView.setImageBitmap(cropped);

    //mImage.setImageBitmap(converted);
   doOCR(convertColorIntoBlackAndWhiteImage(cropped) );

}

public void doOCR(final Bitmap bitmap) {
    if (mProgressDialog == null) {
        mProgressDialog = ProgressDialog.show(this, "Processing",
                "Please wait...", true);
        // mResult.setVisibility(V.ViewISIBLE);


    }
    else {
        mProgressDialog.show();
    }

    new Thread(new Runnable() {
        public void run() {

            final String result = mTessOCR.getOCRResult(bitmap).toLowerCase();


            runOnUiThread(new Runnable() {

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    if (result != null && !result.equals("")) {
                        String s = result.trim();
                        textView.setText(result);


                    }

                    mProgressDialog.dismiss();
                }

            });

        };
    }).start();


}
private Bitmap convertColorIntoBlackAndWhiteImage(Bitmap orginalBitmap) {
    ColorMatrix colorMatrix = new ColorMatrix();
    colorMatrix.setSaturation(0);

    ColorMatrixColorFilter colorMatrixFilter = new ColorMatrixColorFilter(
            colorMatrix);

    Bitmap blackAndWhiteBitmap = orginalBitmap.copy(
            Bitmap.Config.ARGB_8888, true);

    Paint paint = new Paint();
    paint.setColorFilter(colorMatrixFilter);

    Canvas canvas = new Canvas(blackAndWhiteBitmap);
    canvas.drawBitmap(blackAndWhiteBitmap, 0, 0, paint);

    return blackAndWhiteBitmap;
}
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
    if (resultCode == Activity.RESULT_OK) {
        Uri imageUri = getPickImageResultUri(data);

        // For API >= 23 we need to check specifically that we have permissions to read external storage,
        // but we don't know if we need to for the URI so the simplest is to try open the stream and see if we get error.
        boolean requirePermissions = false;
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M &&
                checkSelfPermission(Manifest.permission.READ_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED &&
                isUriRequiresPermissions(imageUri)) {

            // request permissions and handle the result in onRequestPermissionsResult()
            requirePermissions = true;
            mCropImageUri = imageUri;
            requestPermissions(new String[]{Manifest.permission.READ_EXTERNAL_STORAGE}, 0);
        }

        if (!requirePermissions) {
            mCropImageView.setImageUriAsync(imageUri);
        }
    }
}

@Override
public void onRequestPermissionsResult(int requestCode, String permissions[], int[] grantResults) {
    if (mCropImageUri != null && grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
        mCropImageView.setImageUriAsync(mCropImageUri);
    } else {
        Toast.makeText(this, "Required permissions are not granted", Toast.LENGTH_LONG).show();
    }
}

/**
 * Create a chooser intent to select the source to get image from.<br/>
 * The source can be camera's (ACTION_IMAGE_CAPTURE) or gallery's (ACTION_GET_CONTENT).<br/>
 * All possible sources are added to the intent chooser.
 */
public Intent getPickImageChooserIntent() {

    // Determine Uri of camera image to save.
    Uri outputFileUri = getCaptureImageOutputUri();

    List<Intent> allIntents = new ArrayList<>();
    PackageManager packageManager = getPackageManager();

    // collect all camera intents
    Intent captureIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE);
    List<ResolveInfo> listCam = packageManager.queryIntentActivities(captureIntent, 0);
    for (ResolveInfo res : listCam) {
        Intent intent = new Intent(captureIntent);
        intent.setComponent(new ComponentName(res.activityInfo.packageName, res.activityInfo.name));
        intent.setPackage(res.activityInfo.packageName);
        if (outputFileUri != null) {
            intent.putExtra(MediaStore.EXTRA_OUTPUT, outputFileUri);
        }
        allIntents.add(intent);
    }

    // collect all gallery intents
    Intent galleryIntent = new Intent(Intent.ACTION_GET_CONTENT);
    galleryIntent.setType("image/*");
    List<ResolveInfo> listGallery = packageManager.queryIntentActivities(galleryIntent, 0);
    for (ResolveInfo res : listGallery) {
        Intent intent = new Intent(galleryIntent);
        intent.setComponent(new ComponentName(res.activityInfo.packageName, res.activityInfo.name));
        intent.setPackage(res.activityInfo.packageName);
        allIntents.add(intent);
    }

    // the main intent is the last in the list (fucking android) so pickup the useless one
    Intent mainIntent = allIntents.get(allIntents.size() - 1);
    for (Intent intent : allIntents) {
        if (intent.getComponent().getClassName().equals("com.android.documentsui.DocumentsActivity")) {
            mainIntent = intent;
            break;
        }
    }
    allIntents.remove(mainIntent);

    // Create a chooser from the main intent
    Intent chooserIntent = Intent.createChooser(mainIntent, "Select source");

    // Add all other intents
    chooserIntent.putExtra(Intent.EXTRA_INITIAL_INTENTS, allIntents.toArray(new Parcelable[allIntents.size()]));

    return chooserIntent;
}

/**
 * Get URI to image received from capture by camera.
 */
private Uri getCaptureImageOutputUri() {
    Uri outputFileUri = null;
    File getImage = getExternalCacheDir();
    if (getImage != null) {
        outputFileUri = Uri.fromFile(new File(getImage.getPath(), "pickImageResult.jpeg"));
    }
    return outputFileUri;
}

/**
 * Get the URI of the selected image from {@link #getPickImageChooserIntent()}.<br/>
 * Will return the correct URI for camera and gallery image.
 *
 * @param data the returned data of the activity result
 */
public Uri getPickImageResultUri(Intent data) {
    boolean isCamera = true;
    if (data != null && data.getData() != null) {
        String action = data.getAction();
        isCamera = action != null && action.equals(MediaStore.ACTION_IMAGE_CAPTURE);
    }
    return isCamera ? getCaptureImageOutputUri() : data.getData();
}

/**
 * Test if we can open the given Android URI to test if permission required error is thrown.<br>
 */
public boolean isUriRequiresPermissions(Uri uri) {
    try {
        ContentResolver resolver = getContentResolver();
        InputStream stream = resolver.openInputStream(uri);
        stream.close();
        return false;
    } catch (FileNotFoundException e) {
        if (e.getCause() instanceof ErrnoException) {
            return true;
        }
    } catch (Exception e) {
    }
    return false;
}
}

现在，我更改了语言，例如lang="eng+jpn"，将一些训练数据（例如 lang+".traineddata" 更改为"eng.traineddata"+"jpn.traineddata"，并且已经在资产中添加了训练数据，但输出不是jpn 语言（它仍然是 eng ）。

然后我只将 lang="eng" 更改为 lang="jpn" 一种语言，但它不起作用。它仍然英文

我应该怎么办？我不知道如何对付他们。T^T 先谢谢了。

我想知道Simple OCR Android App Using Tesseract和 tess-two 之间的区别。它做同样的事情，但为什么使用它们的代码不一样

我想知道 leptonica 和 opencv 之间的区别，它可以做同样的事情。为什么大部分OCR选择leptonica开发？

score 0 · Accepted Answer

首先你需要下载已经训练好的文件。你可以从这里下载它们https://github.com/tesseract-ocr/tessdata并将它们导入到你创建的文件夹中，然后你需要设置你想要的语言使用那种语言

android - 多语言Tesseract-ocr怎么做

1 回答 1

Related

Reference