0

这个问题是在采访中被问到的

假设您有一个单词词典:(如果您有 /usr/share/dict/words 则使用)。

给定一个单词(例如:cricket),给我字典中可以通过 n 次操作找到的所有单词。其中操作是以下之一:
添加
替换
删除

例如,如果只允许 1 次操作,让我们找出可以从“cricket”组成的所有单词。

{'word': 'clicket', 'op': ['replace']} {'word': 'crickey', 'op': ['replace']} {'word': 'crickety', 'op' :['加法']}等

我以我自己的格式打印,但你明白了要点。

这是我尝试过的

  1. 根据操作数计算所有可能序列的列表。
  2. 然后遍历列表并一一应用它们并存储字典中存在的单词。

这是蛮力解决方案。我想知道是否有有效的解决方案。以下是蛮力解决方案的代码

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class SimilarWordDistance {

    Map<String,Boolean> dictionary = new HashMap<String,Boolean>();
    int ADDTION = -1;
    int REPLACE = 0;
    int DELETION = 1;

    /**
     * @param args
     * @throws IOException 
     */
    public static void main(String[] args) throws IOException {

        SimilarWordDistance swd = new SimilarWordDistance();
        swd.readDictionary();
        //swd.findSimilar("cricket", 1);
        swd.findSimilar("happiness", 3);
    }

    public void findSimilar(String word,int num) {
        int possibleOperations = (int) Math.pow(3 , num);
        Integer[][] operations = new Integer[possibleOperations][num];
        buildOperationsArray(num, possibleOperations, operations);
        List<String> l = new ArrayList<String>();
        l.add(word);
        Map<String,Integer[]> sols = new HashMap<String,Integer[]>();

        for(int i=0;i<operations.length;i++)
            applyOperation(operations[i],l,sols);

        Iterator<String> itr = sols.keySet().iterator();
        while(itr.hasNext()) {
            String n = itr.next();
            printSolution(sols.get(n), n);
        }
    }


    private void applyOperation(Integer[] operation,List<String> word,Map<String,Integer[]> sols) {
        List<String> possiblities = word;
         for(int i=0;i<operation.length;i++) {
            if(operation[i] == ADDTION) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                   temp.addAll(applyAdditionOperation(possiblities.get(j)));
                   //System.out.println(temp.size());
                }
                possiblities = temp;
            } 
            if(operation[i] == REPLACE) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                    temp.addAll(applyReplace(possiblities.get(j)));
                    //System.out.println(temp.size());
                 }
                possiblities = temp;
            }
            if(operation[i] == DELETION) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                    temp.addAll(applyDeletion(possiblities.get(j)));
                 }
                possiblities = temp;
            }
        }

        for(int i=0;i<possiblities.size() ;i++) {
            String w = possiblities.get(i);
            if(dictionary.containsKey(w)) {
                sols.put(w, operation);
            }
        }

    }

    protected void printSolution(Integer[] operation, String w) {
        System.out.print(w+"\t" );
        for(int j=0;j<operation.length;j++) {
            System.out.print(printOperation(operation[j])+"\t");
        }
        System.out.println();
    }

    private String printOperation(Integer integer) {
        if(integer == ADDTION) {
            return "Addition";
        } if(integer == REPLACE) {
            return "Replace";
        } else {
            return "Deletion";
        }
    }

    private List<String> applyAdditionOperation(String word) {
        char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
        List<String> possibleWords = new ArrayList<String>();
        for(int i=0;i<possiblities.length;i++) {
            for(int j=0;j<word.length();j++) {
                String temp = insertAt(word,j,possiblities[i]);
                possibleWords.add(temp);
            }
        }
        return possibleWords;
    }

    private List<String> applyDeletion(String word) {
        List<String> possibleWord = new ArrayList<String>();
        for(int i=0;i<word.length();i++) {
            String prefix = word.substring(0,i);
            String suffix = word.substring(i+1,word.length());
            String tenp = prefix+suffix;
            possibleWord.add(tenp);
        }
        return possibleWord;
    }

    private List<String> applyReplace(String word) {
        char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
        List<String> possibleWord = new ArrayList<String>();
        for(int i=0;i<possiblities.length;i++) {
            for(int j=0;j<word.length();j++) {
                String temp = word.substring(0,j)+possiblities[i]+word.substring(j+1,word.length());
                if(temp.length()!=word.length()) 
                    System.out.println("#####################");
                possibleWord.add(temp);
            }
        }
        return possibleWord;
    }

    private String insertAt(String word, int j, char c) {
        String prefix = word.substring(0,j);
        String suffix = word.substring(j+1,word.length());
        String ret = prefix+c+suffix;
        return ret;
    }

    protected void buildOperationsArray(int num, int possibleOperations,
            Integer[][] operations) {
        for(int i=0;i<possibleOperations;i=i+9){
            for(int j=0;j<num;j++) {
                fillPossiblities(num, operations, ADDTION, i, j); // 3 rows
                if(i+3<possibleOperations)
                    fillPossiblities(num, operations, REPLACE, i+3, j); // 3 rows
                if(i+6 < possibleOperations)
                fillPossiblities(num, operations, DELETION, i+6, j);  // 3 rows
            }
        }
       /* System.out.println(operations.length);
        for(int i=0;i<operations.length;i++) {
            for(int j=0;j<operations[0].length;j++) {
                System.out.print(operations[i][j]+"\t");
            }
            System.out.println();
        }*/
    }


    /**
     * Every time this method is called it will fill all the colums of the passed row
     * with 1 default value and the fill the next 2 rows with possible permutation of that
     * column
     * @param num
     * @param operations
     * @param def
     * @param curRow
     */
    protected void fillPossiblities(int num, Integer[][] operations,int def,int curRow,int curColumn) {
        for(int i=0;i<num;i++) {
            operations[curRow][i] = def;
        }
        for(int i=0;i<num;i++) {
            if(i!=curColumn)
                operations[curRow+1][i] = def;
        }
        operations[curRow+1][curColumn] = getNext(def); //
        int def1 = getNext(def);
        for(int i=0;i<num;i++) {
            if(i!=curColumn)
                operations[curRow+2][i] = def;
        }
        operations[curRow+2][curColumn] = getNext(def1);
    }

    private int getNext(int def) {
        if(def == -1) {
            return REPLACE;
        }
        if(def == 0) {
            return DELETION;
        } else {
            return ADDTION;
        }
    }

    public void readDictionary() throws IOException {

        BufferedReader in = new BufferedReader(new FileReader("C:\\Documents\\Downloads\\words"));

        while (in.ready()) {
          String s = in.readLine();
          dictionary.put(s, true);
        }
        in.close();
    }

}
4

2 回答 2

1
For each word in the-dictionary
   d = minimum-edit-distance (given-word, word)
   if d <= n
      print (word)

最小编辑距离可以通过一个著名的动态规划算法来解决,其中复杂度O(n*m)n两个m词的长度。

维基百科文章有实现:http ://en.wikipedia.org/wiki/Levenshtein_distance

于 2013-11-11T11:27:23.007 回答
0

一种解决方案是您可以修改字典数据结构并以图形的形式表示它。

图的每个节点将代表一个单词。如果一个单词与另一个单词相差一个字母,则从一个节点到另一个节点会有一条边。

在您的情况下,“cricket”和“crickets”之间可能存在一个节点。

一旦字典被加载到这个表单中,之后查询这些操作产生的单词将直接连接到 cricket 的节点。

于 2013-11-11T11:33:26.383 回答