1
4

3 回答 3

1

I'd approach it with a greedy algorithm. Something like this:

    // warning, untested
    public String Translate(String s, Dictionary<String, String> mapping)
    {
        String result = "";
        if (RecurTranslate(s, mapping, ref result))
            return result;

        throw new Exception("No translation");
    }

    private bool RecurTranslate(String s, Dictionary<String, String> mapping, ref String result)
    {
        if (s.Length == 0)
            return true;

        for (int prefixLen = s.Length; prefixLen > 0; --prefixLen)
        {
            String prefix = s.Substring(0, prefixLen);
            String trans;
            if (mapping.TryGetValue(prefix, out trans))
            {
                if (RecurTranslate(s.Substring(prefixLen), mapping, ref result))
                {
                    result = trans + result;
                    return true;
                }
            }
            else if (prefixLen == 1)
            {   // this branch allows a character to stand for itself
                if (RecurTranslate(s.Substring(prefixLen), mapping, ref result))
                {
                    result = prefix + result;
                    return true;
                }
            }
        }

        return false;
    }

This starts from the front, looking for the largest possible match. Depending on your data, other approaches might be better - say, going through the mapping in length order to find the longest match and splitting from there:

    private bool RecurTranslate2(String s, Dictionary<String, String> mapping, ref String result)
    {
        if (s.Length == 0)
            return true;

        foreach (var entry in mapping.Where(e => e.Key.Length <= s.Length).OrderByDescending(e => e.Key.Length))
        {
            if (s.Contains(entry.Key))
            {   // split into a before and after
                int idx = s.IndexOf(entry.Key);
                String before = s.Substring(0, idx);
                String after = s.Substring(idx + entry.Key.Length);
                String bRes = "", aRes = "";
                if (RecurTranslate2(before, mapping, ref bRes) && RecurTranslate2(after, mapping, ref aRes))
                {
                    result = aRes + entry.Value + bRes;
                    return true;
                }
            }
        }

        return false;
    }

Finally, you might play with combining these methods: use RecurTranslate2 until you get to some length threshold and then switch to RecurTranslate.

Responding to comment: See new else branch for failed lookup

于 2012-07-24T15:45:21.800 回答
0

Recursive version - O(2^(string.Length-1)) - dont run on larger strings

[Test]
    public void StringSplit()
    {

        var splits = AllPossibleSplits("hello".Select(c => c.ToString()).ToList());

        Assert.AreEqual(16, splits.Count);
        Assert.AreEqual("hello", splits.First().First());
        Assert.AreEqual("hello".Length, splits.Last().Count());
    }

    private List<List<string>> AllPossibleSplits(List<string> source)
    {

        if (source.Count == 0)
        {
            return new List<List<string>>();
        }
        if (source.Count == 1)
        {
            return new List<List<string>> { source };
        }
        var firstTwoJoined = new List<string> { source[0] + source[1] };
        firstTwoJoined.AddRange(source.Skip(2));

        var justFirst = new List<string> { source[0] };
        var withoutFirst = AllPossibleSplits(source.Skip(1).ToList());

        var result = new List<List<string>>();
        result.AddRange(AllPossibleSplits(firstTwoJoined));
        result.AddRange(withoutFirst.Select(list => justFirst.Concat(list).ToList()));

        return result;
    }
于 2012-07-24T09:33:39.797 回答
0

I would approach this problem differently, perhaps using phonemes. What you want is a process that converts

English text -> English phonemes -> Hindi representations of each phoneme (or combinations) -> Hindi transliterated string.

One open-source string to phoneme synthesizer is eSpeak. Using this method, you can get only the phonemes from the library (discard the wave buffers). Now scan the english phonemes and pick Hindi fragments from a map.

Note: You can choose any library that converts a given string in English to the standard phoneme map, eSpeak is just one that I have seen before

This method should yield better results and seems to me to be a "natural" solution to this problem.

Hope this helps!

于 2012-07-24T17:55:51.393 回答