可能有办法做到这一点,但这在某种程度上取决于您是否知道可能有多少匹配(或数据的一般“匹配性”)。如果有更多的匹配项,最好先假设所有匹配项并消除。
无论如何,我认为您可以对数据进行预处理。我不确定这是否更快——这真的取决于你的数据分布,但我会先尝试这样的事情,然后从那里开始工作:
$a = array(
array(2 => 1, 4 => 2, 9 => 3),
array(3 => 7, 4 => 5, 7 => 3),
array(1 => 6, 4 => 5),
array(1 => 6, 4 => 5, 7 => 5),
array(2 => 1, 4 => 2, 9 => 3)
);
// 1 and 2 match, 2 and 3 match, 0 and 4 match
$keyData = array();
for ($i = 0; $i < count($a); $i++) {
foreach($a[$i] as $k => $v) {
if (!isset($keyData[$k])) {
$keyData[$k] = array();
}
if (!isset($keyData[$k][$v])) {
$keyData[$k][$v] = array();
}
$keyData[$k][$v][] = $i;
}
}
$potentialMatches = array();
foreach ($keyData as $key => $values) {
// Ignore single key/value pairs
if (count($values) > 1) {
foreach ($values as $value => $arrayIndices) {
for ($i = 0; $i < count($arrayIndices); $i ++) {
for ($j = $i + 1; $j < count($arrayIndices); $j ++) {
$potentialMatches[] = array($arrayIndices[$i], $arrayIndices[$j]);
}
}
}
}
}
// You might need to do this ...
/*
foreach ($potentialMatches as &$m) {
array_unique($m);
}
*/
$pairs = array();
foreach ($potentialMatches as $m) {
if(array_intersect_key($a[$m[0]], $a[$m[1]])
== array_intersect_assoc($a[$m[0]], $a[$m[1]])) {
$pairs[] = $m;
}
}
print_r($pairs);
输出:
Array
(
[0] => Array
(
[0] => 0
[1] => 4
)
[1] => Array
(
[0] => 1
[1] => 2
)
[2] => Array
(
[0] => 2
[1] => 3
)
)
编辑
正如我在评论中所说,这不会捕获不共享任何键的数组——你认为这是匹配的。下面的代码执行此操作,尽管我不确定它是否比嵌套解决方案更快(并且它将使用大量内存)
// New test data to cover the case I missed
$a = array(
array(2 => 1, 4 => 2, 9 => 3),
array(3 => 7, 4 => 5, 7 => 3),
array(1 => 6, 4 => 5),
array(1 => 6, 4 => 5, 7 => 5),
array(2 => 1, 4 => 2, 9 => 3),
array(8 => 3)
);
// 1 and 2 match, 2 and 3 match, 0 and 4 match, 5 matches all
// First assume everything is a match, build an array of:
// indicies => array of potential matches
$potentialMatches = array_fill(0, count($a), array_keys($a));
// Build data about each key, the indicies that contain that key
// and the indicies for each value of that key
$keyData = array();
for ($i = 0; $i < count($a); $i++) {
foreach($a[$i] as $k => $v) {
if (!isset($keyData[$k])) {
$keyData[$k] = array();
}
if (!isset($keyData[$k][$v])) {
$keyData[$k][$v] = array();
}
$keyData[$k]['all'][] = $i;
$keyData[$k][$v][] = $i;
}
}
// print_r($keyData);
// Now go through the key data and eliminate indicies that
// can't match
foreach ($keyData as $key => $values) {
if (count($values) > 2) { // Ignore single key/value pairs
// Two indecies do not match if they appear in seperate value lists
// First get the list of all indicies that have this key
$all = array_unique($values['all']);
unset($values['all']);
// Now go through the value lists
foreach ($values as $value => $arrayIndices) {
// The indicies for this value cannot match the other
// indices in the system, i.e. this list
$cantMatch = array_diff($all, $arrayIndices);
// So remove the indicies that can't match from the potentials list
foreach ($arrayIndices as $index) {
$potentialMatches[$index] = array_diff($potentialMatches[$index], $cantMatch);
}
}
}
}
//print_r($potentialMatches);
// You said you didn't mind the output format, so that's probably enough
// but that array contains (x,x) which is pointless and both (x,y) and (y,x)
// so we can do one final bit of processing to print it out in a nicer way
$pairs = array();
foreach ($potentialMatches as $x => $matches) {
foreach ($matches as $y) {
if ( ($x < $y) ) {
$pairs[] = array($x, $y);
}
}
}
print_r($pairs);
输出
Array
(
[0] => Array
(
[0] => 0
[1] => 4
)
[1] => Array
(
[0] => 0
[1] => 5
)
[2] => Array
(
[0] => 1
[1] => 2
)
[3] => Array
(
[0] => 1
[1] => 5
)
[4] => Array
(
[0] => 2
[1] => 3
)
[5] => Array
(
[0] => 2
[1] => 5
)
[6] => Array
(
[0] => 3
[1] => 5
)
[7] => Array
(
[0] => 4
[1] => 5
)
)