我必须匹配至少一个单词相同的 2 个字符串,我需要给出一个成功的消息。
var str1 = "Hello World";
var str2 = "world is beautiful";
我需要匹配/比较这两个字符串,在这两个字符串中都是匹配的,所以我需要打印一条成功消息。我该怎么做。
我必须匹配至少一个单词相同的 2 个字符串,我需要给出一个成功的消息。
var str1 = "Hello World";
var str2 = "world is beautiful";
我需要匹配/比较这两个字符串,在这两个字符串中都是匹配的,所以我需要打印一条成功消息。我该怎么做。
以下代码将输出两个字符串中的所有匹配词:
var words1 = str1.split(/\s+/g),
words2 = str2.split(/\s+/g),
i,
j;
for (i = 0; i < words1.length; i++) {
for (j = 0; j < words2.length; j++) {
if (words1[i].toLowerCase() == words2[j].toLowerCase()) {
console.log('word '+words1[i]+' was found in both strings');
}
}
}
您可以通过对每个单词进行排序并消除重复项来避免将一个列表中的所有单词与另一个列表中的所有单词进行比较。改编bjornd 的答案:
var words1 = str1.split(/\s+/g),
words2 = str2.split(/\s+/g);
var allwords = {};
// set 1 for all words in words1
for(var wordid=0; wordid < words1.length; ++wordid) {
var low = words1[wordid].toLowerCase();
allwords[low] = 1;
}
// add 2 for all words in words2
for(var wordid=0; wordid < words2.length; ++wordid) {
var current = 0;
var low = words2[wordid].toLowerCase();
if(allwords.hasOwnProperty(low)) {
if(allwords[low] > 1) {
continue;
}
}
current += 2;
allwords[low] = current;
}
// now those seen in both lists have value 3, the rest either 1 or 2.
// this is effectively a bitmask where the unit bit indicates words1 membership
// and the 2 bit indicates words2 membership
var both = [];
for(var prop in allwords) {
if(allwords.hasOwnProperty(prop) && (allwords[prop] == 3)) {
both.push(prop);
}
}
这个版本应该相当有效,因为我们使用字典/散列结构来存储关于每组单词的信息。整个事情在 javascript 表达式中是 O(n),但不可避免地字典插入不是,所以在实践中期望像 O(n log n) 这样的东西。如果您只关心单个单词匹配,您可以在第二个 for 循环中提前退出;代码原样将找到所有匹配项。
这大致相当于对两个列表进行排序,将每个列表减少为唯一的单词,然后在两个列表中查找对。在 C++ 等中,您可以通过两组来完成,因为您可以在不使用字典的情况下完成,并且在排序之后比较将是 O(n)。在 Python 中,因为它易于阅读:
words1 = set(item.lower() for item in str1.split())
words2 = set(item.lower() for item in str2.split())
common = words1 & words2
这里的排序(与任何集合一样)发生在单词计数 n 上插入集合 O(n log n) 时,然后交集 (&) 在集合长度 m 上是有效的 O(m)。
只需使用真正的位掩码调整@Phil H 的代码:
var strings = ["Hello World", "world is beautiful"]; // up to 32 word lists
var occurrences = {},
result = [];
for (var i=0; i<strings.length; i++) {
var words = strings[i].toLowerCase().split(/\s+/),
bit = 1<<i;
for (var j=0, l=words.length; j<l; j++) {
var word = words[j];
if (word in occurrences)
occurrences[word] |= bit;
else
occurrences[word] = bit;
}
}
// now lets do a match for all words which are both in strings[0] and strings[1]
var filter = 3; // 1<<0 | 1<<1
for (var word in occurrences)
if ((occurrences[word] & filter) === filter)
result.push(word);
我刚刚在WriteCodeOnline上试过这个,它在那里工作:
var s1 = "hello world, this is me";
var s2 = "I am tired of this world and I want to get off";
var s1s2 = s1 + ";" + s2;
var captures = /\b(\w+)\b.*;.*\b\1\b/i.exec(s1s2);
if (captures[1])
{
document.write(captures[1] + " occurs in both strings");
}
else
{
document.write("no match in both strings");
}
好的,简单的方法:
function isMatching(a, b)
{
return new RegExp("\\b(" + a.match(/\w+/g).join('|') + ")\\b", "gi").test(b);
}
isMatching("in", "pin"); // false
isMatching("Everything is beautiful, in its own way", "Every little thing she does is magic"); // true
isMatching("Hello World", "world is beautiful"); // true
...理解?
我基本上转换"Hello, World!"
为正则表达式/\b(Hello|World)\b/gi
这样的事情也可以:
isMatching = function(str1, str2) {
str2 = str2.toLowerCase();
for (var i = 0, words = str1.toLowerCase().match(/\w+/g); i < words.length; i++) {
if (str2.search(words[i]) > -1) return true;
}
return false;
};
var str1 = "Hello World";
var str2 = "world is beautiful";
isMatching(str1, str2); // returns true
isMatching(str1, 'lorem ipsum'); // returns false