1

我正在制作(试图制作)一个可以返回包含单词字符的数组的函数。它需要将一些双字母作为一个保存在数组中。我有一个包含双字母的数组。我有一个单词列表(有时很大)。现在的问题是它保存了第一个字母 2 次,它保存了数组中单词中的双字符,称为字母。该函数本身在一个循环中运行,该循环从一个 json 对象中获取单词。该功能是按照我能想到的方式制作的,但如果有更好的方法(显然)请告诉我如何。

功能:

var word = 'voorheen';
var doubles = ['aa', 'oo', 'ee', 'ie', 'oe', 'eu', 'uu', 'au', 'ou', 'ui', 'ng', 'ch',   'ij'];
var letters = getLetters(word, doubles);
console.log(letters);

function getLetters(word, doubles) { 
var letters = [];
var specials = [];
var indexes = [];
for(var s=0;s<doubles.length;s++) {
    if(word.indexOf(doubles[s]) != -1) {
        specials.push(doubles[s]); 
        indexes.push(word.indexOf(doubles[s])); 
        console.log('specials: ' + specials);           
        console.log('indexes: ' + indexes);          
    }
}   
for(var i=0;i<word.length;i++) { 
    if(specials.length>0) {
        for(var j=0;j<specials.length;j++) {
            if(i<indexes[j]) {
                letters.push(word[i]);  
                console.log('i: ' + i);
                console.log('j: ' + j);
                console.log('letter: ' + word[i]);
            }    
            if(i==indexes[j]) {
                letters.push(specials[j]);
                console.log('i: ' + i);
                console.log('j: ' + j);
                console.log('letter: ' + word[i]);                  
            }
            if(i>indexes[j] + specials[j].length) { 
                letters.push(word[i]);
                console.log('i: ' + i);
                console.log('j: ' + j);
                console.log('letter: ' + word[i]);                  
            }

        }
    }
    else {
        letters.push(word[i]);
    }       
}
return letters;
}   

字母的Chrome日志输出:

["v", "v", "oo", "o", "o", "r", "h", "h", "e", "ee", "e", "n"] 

虽然我想要:

 ["v", "oo", "r", "h", "ee", "n"] 
4

4 回答 4

2
var word = 'voorheen';
var doubles = ['aa','oo','ee','ie','oe','eu','uu','au','ou','ui','ng','ch','ij'];
var letters = word.match(new RegExp(doubles.join("|")+"|.","g")) || [];

正则表达式尽可能多地捕获(除非特别声明不贪婪),因此两个字母组合优先于.匹配任何单个字符的组合。

奖励积分,因为这可以扩展为包括任何长度的组合:p

于 2013-06-20T16:01:31.110 回答
1

这是一个应该在没有正则表达式和 POJS 的情况下工作的解决方案

Javascript

function stringToFormattedArray(string) {
    var doubles = ['aa', 'oo', 'ee', 'ie', 'oe', 'eu', 'uu', 'au', 'ou', 'ui', 'ng', 'ch', 'ij'],
        result = [],
        chars,
        pair;

    while (string) {
        pair = string.slice(0, 2);
        if (doubles.indexOf(pair) !== -1) {
            result.push(pair);
            string = string.slice(2);
        } else {
            result.push(pair.split("")[0]);
            string = string.slice(1);
        }
    }

    return result;
}

console.log(stringToFormattedArray("voorheen"));

输出

["v", "oo", "r", "h", "ee", "n"]

jsfiddle 上

注意:Array.prototype.indexOf可以通过 MDN 或es5_shim提供的填充。或者当然你可以手动遍历数组并执行===

更新:没有Array.prototype.indexOf

Javascript

function stringToFormattedArray(string) {
    var doubles = ['aa', 'oo', 'ee', 'ie', 'oe', 'eu', 'uu', 'au', 'ou', 'ui', 'ng', 'ch', 'ij'],
        length = doubles.length,
        result = [],
        chars,
        pair,
        i;

    while (string) {
        pair = string.slice(0, 2);

        i = 0;
        while (i < length) {
            if (pair === doubles[i]) {
                result.push(pair);
                string = string.slice(2);
                break;
            }

            i += 1;
        }

        if (i === length) {
            result.push(pair.split("")[0]);
            string = string.slice(1);
        }
    }

    return result;
}

console.log(stringToFormattedArray("voorheen"));

jsfiddle 上

更新:为了纯粹的兴趣,我创建了一个jsperf来测试特定字符串“vorheen”的正则表达式版本与上述版本。

于 2013-06-20T16:12:20.873 回答
1

Well, in your iteration over specials you're pushing the letters every time if it is not at the current indexes[j]. Since your specials has two members, every letter will get doubled up.

To fix that, you would need a flag whether the current letter should be pushed or not that is set during that loop. Btw, your approach with the indices is flawed anyway, as it doesn't cope with repeating doubles (e.g. oohoo). Better:

function getLetters(word, doubles) {
    var letters = [];
    for (var i=0; i<word.length; i++) {
        var next = word.slice(i, i+2);
        if (doubles.indexOf(next) >= 0) {
            letters.push(next);
            i++;
        } else
            letters.push(word.charAt(i));
    }
    return letters;
}

A regex-based matcher would be much simpler:

var word = 'voorheen',
    letters = word.match(/aa|oo|ee|ie|oe|eu|uu|au|ou|ui|ng|ch|ij|\S/g) || [];
于 2013-06-20T16:13:26.877 回答
0

I think you also make it too complicated.

Just try something as below : http://jsfiddle.net/jHjkQ/

var word = 'voorheen';
var doubles = ['aa', 'oo', 'ee', 'ie', 'oe', 'eu', 'uu', 'au', 'ou', 'ui', 'ng', 'ch',   'ij'];
var result = [];
for(var i=0; i < word.length;i++) {
    var nextI = i + 1;
    //as double it must be first if you going to implement triple add above this line...
    if (nextI < word.length && doubles.indexOf(word[i] + "" + word[nextI]) > -1) {
        result.push(doubles[doubles.indexOf(word[i] + "" + word[nextI])]);
        i++; //double ignore next one
    }
    else {
        result.push(word[i]);
    }
}

console.log(result);
于 2013-06-20T16:13:46.197 回答