我非常喜欢这个问题,我无法抗拒对上面建议的算法进行编码。代码有点长,所以我把它放在一个单独的响应中。
它在示例中提出了这个序列。
Step 1: c d
Step 2: b c
Step 3: a b c
Step 4: a b c d
Step 5: a b c d
Step 6: a b
请注意,此算法忽略了初始设置和最终拆卸成本。它只考虑设置间的距离。这里的汉明距离是 2 + 1 + 1 + 0 + 2 = 6。这与问题中给出的顺序相同的总距离。
#include <stdio.h>
#include <stdlib.h>
// With these data types we can have up to 64k items and 64k sets of items,
// But then the table of pairs is about 20Gb!
typedef unsigned short ITEM, INDEX;
// A sku set in the problem.
struct set {
INDEX n_elts;
ITEM *elts;
};
// A pair of sku sets and associated info.
struct pair {
INDEX i, j; // Indices of sets.
ITEM dist; // Hamming distance between sets.
INDEX rank, parent; // Disjoint set union/find fields.
};
// For a given set, the adjacent ones along the path under construction.
struct adjacent {
unsigned char n; // 0, 1, or 2.
INDEX elts[2]; // Indices of n adjacent sets.
};
// Some tracing functions for fun.
void print_pair(struct pair *pairs, int i)
{
struct pair *p = pairs + i;
printf("%d:(%d,%d@%d)[%d->%d]\n", i, p->i, p->j, p->dist, p->rank, p->parent);
}
void print_adjacent(struct adjacent *adjs, int i)
{
struct adjacent *a = adjs + i;
switch (a->n) {
case 0: printf("%d:o", i); break;
case 1: printf("%d:o->%d\n", i, a->elts[0]); break;
default: printf("%d:%d<-o->%d\n", i, a->elts[0], a->elts[1]); break;
}
}
// Compute the Hamming distance between two sets. Assumes elements are sorted.
// Works a bit like merging.
ITEM hamming_distance(struct set *a, struct set *b)
{
int ia = 0, ib = 0;
ITEM d = 0;
while (ia < a->n_elts && ib < b->n_elts) {
if (a->elts[ia] < b->elts[ib]) {
++d;
++ia;
}
else if (a->elts[ia] > b->elts[ib]) {
++d;
++ib;
}
else {
++ia;
++ib;
}
}
return d + (a->n_elts - ia) + (b->n_elts - ib);
}
// Classic disjoint set find operation.
INDEX find(struct pair *pairs, INDEX x)
{
if (pairs[x].parent != x)
pairs[x].parent = find(pairs, pairs[x].parent);
return pairs[x].parent;
}
// Classic disjoint set union. Assumes x and y are canonical.
void do_union(struct pair *pairs, INDEX x, INDEX y)
{
if (x == y) return;
if (pairs[x].rank < pairs[y].rank)
pairs[x].parent = y;
else if (pairs[x].rank > pairs[y].rank)
pairs[y].parent = x;
else {
pairs[y].parent = x;
pairs[x].rank++;
}
}
// Sort predicate to sort pairs by Hamming distance.
int by_dist(const void *va, const void *vb)
{
const struct pair *a = va, *b = vb;
return a->dist < b->dist ? -1 : a->dist > b->dist ? +1 : 0;
}
// Return a plan with greedily found least Hamming distance sum.
// Just an array of indices into the given table of sets.
// TODO: Deal with calloc/malloc failure!
INDEX *make_plan(struct set *sets, INDEX n_sets)
{
// Allocate enough space for all the pairs taking care for overflow.
// This grows as the square of n_sets!
size_t n_pairs = (n_sets & 1) ? n_sets / 2 * n_sets : n_sets / 2 * (n_sets - 1);
struct pair *pairs = calloc(n_pairs, sizeof(struct pair));
// Initialize the pairs.
int ip = 0;
for (int j = 1; j < n_sets; j++) {
for (int i = 0; i < j; i++) {
struct pair *p = pairs + ip++;
p->i = i;
p->j = j;
p->dist = hamming_distance(sets + i, sets + j);
}
}
// Sort by Hamming distance.
qsort(pairs, n_pairs, sizeof pairs[0], by_dist);
// Initialize the disjoint sets.
for (int i = 0; i < n_pairs; i++) {
struct pair *p = pairs + i;
p->rank = 0;
p->parent = i;
}
// Greedily add pairs to the Hamiltonian path so long as they don't cause a non-path!
ip = 0;
struct adjacent *adjs = calloc(n_sets, sizeof(struct adjacent));
for (int i = 0; i < n_pairs; i++) {
struct pair *p = pairs + i;
struct adjacent *ai = adjs + p->i, *aj = adjs + p->j;
// Continue if we'd get a vertex with degree 3 by adding this edge.
if (ai->n == 2 || aj->n == 2) continue;
// Find (possibly) disjoint sets of pair's elements.
INDEX i_set = find(pairs, p->i);
INDEX j_set = find(pairs, p->j);
// Continue if we'd form a cycle by adding this edge.
if (i_set == j_set) continue;
// Otherwise add this edge.
do_union(pairs, i_set, j_set);
ai->elts[ai->n++] = p->j;
aj->elts[aj->n++] = p->i;
// Done after we've added enough pairs to touch all sets in a path.
if (++ip == n_sets - 1) break;
}
// Find a set with only one adjacency, the path start.
int p = -1;
for (int i = 0; i < n_sets; ++i)
if (adjs[i].n == 1) {
p = i;
break;
}
// A plan will be an ordering of sets.
INDEX *plan = malloc(n_sets * sizeof(INDEX));
// Walk along the path to get the ordering.
for (int i = 0; i < n_sets; i++) {
plan[i] = p;
struct adjacent *a = adjs + p;
// This logic figures out which adjacency takes us forward.
p = a->elts[ a->n > 1 && a->elts[1] != plan[i-1] ];
}
// Done with intermediate data structures.
free(pairs);
free(adjs);
return plan;
}
// A tiny test case. Much more testing needed!
#define ARRAY_SIZE(A) (sizeof A / sizeof A[0])
#define SET(Elts) { ARRAY_SIZE(Elts), Elts }
// Items must be in ascending order for Hamming distance calculation.
ITEM a1[] = { 'a', 'b' };
ITEM a2[] = { 'a', 'b', 'c' };
ITEM a3[] = { 'a', 'b', 'c', 'd' };
ITEM a4[] = { 'a', 'b', 'c', 'd' };
ITEM a5[] = { 'b', 'c' };
ITEM a6[] = { 'c', 'd' };
// Out of order to see how we do.
struct set sets[] = { SET(a3), SET(a6), SET(a1), SET(a4), SET(a5), SET(a2) };
int main(void)
{
int n_sets = ARRAY_SIZE(sets);
INDEX *plan = make_plan(sets, n_sets);
for (int i = 0; i < n_sets; i++) {
struct set *s = sets + plan[i];
printf("Step %d: ", i+1);
for (int j = 0; j < s->n_elts; j++) printf("%c ", (char)s->elts[j]);
printf("\n");
}
return 0;
}