0

我有一个关于两个字符串的部分匹配的问题。

我有一个字符串,我需要验证它。更具体地说,我有一个 OCR 读取的输出,当然它包含一些错误。我需要检查字符串是否真的存在,但由于它可能写不正确,我只需要 70% 的匹配。

是否可以在 UiPath 中做到这一点?该字符串位于记事本(.txt)中,因此任何想法都会有所帮助。

4

1 回答 1

0

尝试将 OCR 输出/words_detected 传递给基本词。(双重模糊度为 0-1)

list<string> Search(string word, list<string> wordList, double fuzzyness) {
list<string> foundWords;
for (string s : wordList) {
    int levenshteinDistance = LevenshteinDistance(word, s);
    int length = max(word.length(), s.length());
    double score = 1.0 - (double)levenshteinDistance / length;
    if (score > fuzzyness) foundWords.push_back(s);
}
if (foundWords.size() > 1) {
    for (double d = fuzzyness; ; d++) {
        foundWords = Search(word, wordList, d);
        if (foundWords.size() == 1) break;
    }
}

return foundWords;}

int LevenshteinDistance(string src, string dest) {
std::vector<vector<int>> d;
d.resize((int)src.size() + 1, std::vector<int>((int)dest.size() + 1, 0));
int i, j, cost;
std::vector<char> str1(src.begin(), src.end());
std::vector<char> str2(dest.begin(), dest.end());

for (i = 0; i <= str1.size(); i++) d[i][0] = i;
for (j = 0; j <= str2.size(); j++) d[0][j] = j;
for (i = 1; i <= str1.size(); i++) {
    for (j = 1; j <= str2.size(); j++) {
        if (str1[i - 1] == str2[j - 1]) cost = 0;
        else cost = 1;

        d[i][j] = min(d[i - 1][j] + 1, min(d[i][j - 1] + 1, d[i - 1][j - 1] + cost));
        if ((i > 1) && (j > 1) && (str1[i - 1] == str2[j - 2]) && (str1[i - 2] == str2[j - 1])) d[i][j] = min(d[i][j], d[i - 2][j - 2] + cost);
    }
}

return d[str1.size()][str2.size()];}
于 2018-06-11T20:13:55.540 回答