该算法易于实现。例如,这是我用 Java 编写的一个快速实现:
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class StrikeAMatch {
protected int numPairs(final String string) {
return Math.max(0, string.length() - 1);
}
protected Map<String, Integer> pairs(final String string) {
if (string.isEmpty()) {
return new HashMap<>(0);
}
final Map<String, Integer> pairs = new HashMap(2 * numPairs(string));
for (int i = 1, j = string.length(); i < j; i += 1) {
final char a = string.charAt(i - 1);
final char b = string.charAt(i);
final String pair = String.format("%c%c", a, b);
if (pairs.containsKey(pair)) {
pairs.put(pair, 1 + pairs.get(pair));
}
else {
pairs.put(pair, 1);
}
}
return pairs;
}
protected int intersectionSize(
final Map<String, Integer> lhsPairs,
final Map<String, Integer> rhsPairs) {
final Set<String> lhsKeys = lhsPairs.keySet();
final Set<String> rhsKeys = rhsPairs.keySet();
final Set<String> pairIntersection = new HashSet<>(lhsKeys);
pairIntersection.retainAll(rhsKeys);
int sharedPairs = 0;
for (final String pair : pairIntersection) {
sharedPairs += Math.min(lhsPairs.get(pair), rhsPairs.get(pair));
}
return sharedPairs;
}
public double between(final String lhs, final String rhs) {
if (lhs.isEmpty() && rhs.isEmpty()) {
return 1.0;
}
final Map<String, Integer> lhsPairs = pairs(lhs.toUpperCase());
final Map<String, Integer> rhsPairs = pairs(rhs.toUpperCase());
return (2.0 * intersectionSize(lhsPairs, rhsPairs))
/ (numPairs(lhs) + numPairs(rhs));
}
public static void main(final String[] args) {
final StrikeAMatch distance = new StrikeAMatch();
for (final String lhs : args) {
for (final String rhs : args) {
System.out.printf("d(\"%s\", \"%s\") = [%.7f]%n",
lhs, rhs, distance.between(lhs, rhs));
}
}
}
}
您甚至可以填充第一个和最后一个字符,以将其扩展到一个字符或零字符的术语:
import java.util.HashMap;
import java.util.Map;
public class PaddedStrikeAMatch extends StrikeAMatch {
protected int numPairs(final String string) {
return string.length() + 1;
}
protected Map<String, Integer> pairs(final String string) {
if (string.isEmpty()) {
final Map<String, Integer> pairs = new HashMap<>(1);
pairs.put(" ", 1);
return pairs;
}
final Map<String, Integer> pairs = new HashMap(2 * numPairs(string));
pairs.put(String.format(" %c", string.charAt(0)), 1);
pairs.put(String.format("%c ", string.charAt(string.length() - 1)), 1);
for (int i = 1, j = string.length(); i < j; i += 1) {
final char a = string.charAt(i - 1);
final char b = string.charAt(i);
final String pair = String.format("%c%c", a, b);
if (pairs.containsKey(pair)) {
pairs.put(pair, 1 + pairs.get(pair));
}
else {
pairs.put(pair, 1);
}
}
return pairs;
}
public static void main(final String[] args) {
final StrikeAMatch distance = new PaddedStrikeAMatch();
for (final String lhs : args) {
for (final String rhs : args) {
System.out.printf("d(\"%s\", \"%s\") = [%.7f]%n",
lhs, rhs, distance.between(lhs, rhs));
}
}
}
}
为了验证它,以下是您提供的链接中提出的示例:
% java StrikeAMatch France French
d("France", "France") = [1.0000000]
d("France", "French") = [0.4000000]
d("French", "France") = [0.4000000]
d("French", "French") = [1.0000000]
% java StrikeAMatch Healed Sealed Healthy Heard Herded Help Sold
d("Healed", "Healed") = [1.0000000]
d("Healed", "Sealed") = [0.8000000]
d("Healed", "Healthy") = [0.5454545]
d("Healed", "Heard") = [0.4444444]
d("Healed", "Herded") = [0.4000000]
d("Healed", "Help") = [0.2500000]
d("Healed", "Sold") = [0.0000000]
d("Sealed", "Healed") = [0.8000000]
d("Sealed", "Sealed") = [1.0000000]
d("Sealed", "Healthy") = [0.3636364]
d("Sealed", "Heard") = [0.2222222]
d("Sealed", "Herded") = [0.2000000]
d("Sealed", "Help") = [0.0000000]
d("Sealed", "Sold") = [0.0000000]
d("Healthy", "Healed") = [0.5454545]
d("Healthy", "Sealed") = [0.3636364]
d("Healthy", "Healthy") = [1.0000000]
d("Healthy", "Heard") = [0.4000000]
d("Healthy", "Herded") = [0.1818182]
d("Healthy", "Help") = [0.2222222]
d("Healthy", "Sold") = [0.0000000]
d("Heard", "Healed") = [0.4444444]
d("Heard", "Sealed") = [0.2222222]
d("Heard", "Healthy") = [0.4000000]
d("Heard", "Heard") = [1.0000000]
d("Heard", "Herded") = [0.4444444]
d("Heard", "Help") = [0.2857143]
d("Heard", "Sold") = [0.0000000]
d("Herded", "Healed") = [0.4000000]
d("Herded", "Sealed") = [0.2000000]
d("Herded", "Healthy") = [0.1818182]
d("Herded", "Heard") = [0.4444444]
d("Herded", "Herded") = [1.0000000]
d("Herded", "Help") = [0.2500000]
d("Herded", "Sold") = [0.0000000]
d("Help", "Healed") = [0.2500000]
d("Help", "Sealed") = [0.0000000]
d("Help", "Healthy") = [0.2222222]
d("Help", "Heard") = [0.2857143]
d("Help", "Herded") = [0.2500000]
d("Help", "Help") = [1.0000000]
d("Help", "Sold") = [0.0000000]
d("Sold", "Healed") = [0.0000000]
d("Sold", "Sealed") = [0.0000000]
d("Sold", "Healthy") = [0.0000000]
d("Sold", "Heard") = [0.0000000]
d("Sold", "Herded") = [0.0000000]
d("Sold", "Help") = [0.0000000]
d("Sold", "Sold") = [1.0000000]
...这是填充版本:
% java PaddedStrikeAMatch France French
d("France", "France") = [1.0000000]
d("France", "French") = [0.4285714]
d("French", "France") = [0.4285714]
d("French", "French") = [1.0000000]
% java PaddedStrikeAMatch Healed Sealed Healthy Heard Herded Help Sold
d("Healed", "Healed") = [1.0000000]
d("Healed", "Sealed") = [0.7142857]
d("Healed", "Healthy") = [0.5333333]
d("Healed", "Heard") = [0.6153846]
d("Healed", "Herded") = [0.5714286]
d("Healed", "Help") = [0.3333333]
d("Healed", "Sold") = [0.1666667]
d("Sealed", "Healed") = [0.7142857]
d("Sealed", "Sealed") = [1.0000000]
d("Sealed", "Healthy") = [0.2666667]
d("Sealed", "Heard") = [0.3076923]
d("Sealed", "Herded") = [0.2857143]
d("Sealed", "Help") = [0.0000000]
d("Sealed", "Sold") = [0.3333333]
d("Healthy", "Healed") = [0.5333333]
d("Healthy", "Sealed") = [0.2666667]
d("Healthy", "Healthy") = [1.0000000]
d("Healthy", "Heard") = [0.4285714]
d("Healthy", "Herded") = [0.2666667]
d("Healthy", "Help") = [0.3076923]
d("Healthy", "Sold") = [0.0000000]
d("Heard", "Healed") = [0.6153846]
d("Heard", "Sealed") = [0.3076923]
d("Heard", "Healthy") = [0.4285714]
d("Heard", "Heard") = [1.0000000]
d("Heard", "Herded") = [0.6153846]
d("Heard", "Help") = [0.3636364]
d("Heard", "Sold") = [0.1818182]
d("Herded", "Healed") = [0.5714286]
d("Herded", "Sealed") = [0.2857143]
d("Herded", "Healthy") = [0.2666667]
d("Herded", "Heard") = [0.6153846]
d("Herded", "Herded") = [1.0000000]
d("Herded", "Help") = [0.3333333]
d("Herded", "Sold") = [0.1666667]
d("Help", "Healed") = [0.3333333]
d("Help", "Sealed") = [0.0000000]
d("Help", "Healthy") = [0.3076923]
d("Help", "Heard") = [0.3636364]
d("Help", "Herded") = [0.3333333]
d("Help", "Help") = [1.0000000]
d("Help", "Sold") = [0.0000000]
d("Sold", "Healed") = [0.1666667]
d("Sold", "Sealed") = [0.3333333]
d("Sold", "Healthy") = [0.0000000]
d("Sold", "Heard") = [0.1818182]
d("Sold", "Herded") = [0.1666667]
d("Sold", "Help") = [0.0000000]
d("Sold", "Sold") = [1.0000000]
如果你想直接从 JRuby 中使用它,你只需要添加StrikeAMatch.class
到你的$CLASSPATH
, 和你的脚本中,require 'java'
然后是java_import 'StrikeAMatch
:
require 'java'
java_import 'StrikeAMatch'
distance = StrikeAMatch.new
ARGV.each do |lhs|
ARGV.each do |rhs|
puts "d(\"#{lhs}\", \"#{rhs}\") = [#{distance.between(lhs, rhs)}]"
end
end
要调用它:
% jruby strike_a_match.rb France French
d("France", "France") = [1.0]
d("France", "French") = [0.4]
d("French", "France") = [0.4]
d("French", "French") = [1.0]