以下是按同时计算的掩码数量排序的结果。
如果单独计算,每个掩码最多需要 7 次操作:
a01 = b0 & b1
a23 = b2 & b3
r01 = b0 | b1
r23 = b2 | b3
m0 = ~(r01 | r23) // 4 ops
m1 = (a01 | r23) ^ (r01 | a23) // 7 ops
m2 = (r01 & r23) ^ (a01 | a23) // 7 ops
m3 = (r01 & r23) & (a01 ^ a23) // 7 ops
m4 = a01 & a23 // 3 ops
这里有很多常见的子表达式,所以如果你需要一次知道任何一对掩码,你最多需要 10 次操作(少用m0
or m4
)。
但是某些对的计算可能会进一步优化:
// m2,m3 in 9 ops
t1 = r01 & r23
t2 = a01 ^ a23
m2 = t1 ^ (a23 | t2)
m3 = t1 & t2
// m1,m3 in 9 ops
t1 = r01 ^ r23
t2 = a01 ^ a23
t3 = t1 ^ t2
m1 = t1 & t3
m3 = t2 & t3
同样的方法适用于掩码三元组。在 11 次操作中,只有一个三元组 ( m1
, m2
, m3
) 可以更快地计算,使用“排序位”方法,这是最佳的。
如果您一次需要 4 或 5 个掩码,我相信,“排序位”方法会产生最佳结果。
如果我们允许更多操作(NAND),则可以进行更多优化。实际上,最后一个代码片段的最后 3 行可以替换为 2 个 NAND:
// m1,m3 in 8 ops (NAND is a single op)
t1 = r01 ^ r23
t2 = a01 ^ a23
m1 = t1 & ~t2
m3 = ~t1 & t2
并且 (m1,m2,m3) 三元组也可以使用 NAND 进行优化:
// m1,m2,m3 in 10 ops (NAND is a single op)
x01 = b0 ^ b1
x23 = b2 ^ b3
a01 = b0 & b1
a23 = b2 & b3
t1 = x01 ^ x23
t2 = a01 ^ a23
m1 = t1 & ~t2
m2 = ~t1 & (x23 ^ t2)
m3 = t1 & t2
再添加一项操作m4 = a01 & a23
以获取除m0
11 项操作之外的所有掩码。
这些结果是在详尽的搜索代码的帮助下获得的(见下文)。这段代码使用了一些简化的假设,以便能够足够快地运行。这些假设并不明显,这使得该代码不是证明结果最优性的好工具。至少单独掩码的结果是最佳的,这已由其他答案的代码证明。我的代码“证明”了掩码对和三元组结果的最优性,这意味着您很可能无法更快地做到这一点。
这是代码(C++14 或 C++11 加上二进制文字):
/* Search for minimal logical expression (using &|^ operations) for bits set
* exactly N times (in a group of 4 bits).
*
* Uses brute force approach to get one or two expressions for one or two
* values of N at once. To make it possible getting results in reasonable time
* some simplifications were made:
* - first 4 operations pre-defined: &| or ^& or ^| for 2 pairs of input values
* - number of ops limited, if no result found within limit, print "impossible"
* - no attempts to perform operation on 2 expr with the same left and the same
* right parts
* - unused nodes are not allowed (to minimize number of duplicate attempts)
* - no attempt to use "not" (except for "m0")
*
* Also these optimizations were tried (with no significant effect):
* - no more than 2 different ops on same pair of sub-expressions
* - do not apply same op on same pair of sub-expressions more than once
*
* operation set may be extended by "nand" (kNAnd option)
*/
#include <algorithm>
#include <array>
#include <iostream>
#include <bitset>
#include <thread>
#include <mutex>
#include <cassert>
using namespace std;
enum {
kMaxSize = 17,
kNTargets = 5,
kNInputs = 4,
kNil = 255,
kNAnd = 0,
};
enum Op {
OpAnd = kNInputs,
OpOr,
OpXor,
OpNAndL,
OpNAndR,
};
array<const char*, kNInputs + 3> g_op_str {
"b0", "b1", "b2", "b3",
" & ", " | ", " ^ ",
};
array<unsigned, kNTargets> g_target_masks {
0b0111111111111111, // gives correct result only after additional "not"
0b0111100000000000,
0b0000011111100000,
0b0000000000011110,
0b0000000000000001,
};
// 0111122222233334
array<unsigned, kNInputs> g_literal_vals {
0b0100011100001111,
0b0010010011010111,
0b0001001010111011,
0b0000100101111101,
};
unsigned g_targets = 0;
unsigned g_score_limit = 0;
mutex g_print_mutex;
template<typename C, typename T>
ptrdiff_t findIndex(C c, T t)
{
auto it = find(begin(c), end(c), t);
return it - begin(c);
}
struct DAGNode
{
unsigned value;
uint8_t op;
bool l_not;
bool r_not;
uint8_t left;
uint8_t right;
uint8_t use_cnt;
void clear()
{
use_cnt = 0;
}
void setLit(const uint8_t lit, const unsigned v)
{
value = v;
op = lit;
l_not = false;
r_not = false;
left = kNil;
right = kNil;
use_cnt = 0;
}
};
struct DAG
{
array<DAGNode, kMaxSize> nodes;
unsigned size;
unsigned score;
void print(ostream& out, size_t ind)
{
auto& node = nodes[ind];
if (node.op < kNInputs)
{
out << g_op_str[node.op];
}
else
{
out << '(';
if (node.l_not) out << '~';
print(out, node.left);
out << g_op_str[node.op];
if (node.r_not) out << '~';
print(out, node.right);
out << ')';
}
}
void printAll(ostream& out)
{
for (size_t i = 2 * kNInputs; i < size; ++i)
{
auto& node = nodes[i];
auto ind = findIndex(g_target_masks, node.value);
if ((1 << ind) & g_targets)
{
out << 'm' << static_cast<char>('0' + ind) << " = ";
print(out, i);
out << '\n';
}
}
}
};
bool operator < (const DAG& l, const DAG& r)
{
return l.score < r.score;
}
class Find
{
using SPA = array<uint8_t, (kMaxSize - kNInputs) * (kMaxSize - kNInputs)>;
using EDA = bitset<(kMaxSize - kNInputs) * (kMaxSize - kNInputs) * 5>;
SPA same_pair_;
EDA dup_op_;
DAG dag_;
DAG best_;
unsigned ops_;
unsigned targets_;
unsigned unused_;
class UseCnt
{
unsigned& unused_;
uint8_t& use_cnt_;
public:
UseCnt(unsigned& unused, uint8_t& use_cnt)
: unused_(unused)
, use_cnt_(use_cnt)
{
if (!use_cnt_)
--unused_;
++use_cnt_;
}
~UseCnt()
{
--use_cnt_;
if (!use_cnt_)
++unused_;
}
};
class PairLim
{
uint8_t& counter_;
public:
PairLim(SPA& spa, size_t l, size_t r)
: counter_(spa[(kMaxSize - kNInputs) * l + r])
{
++counter_;
}
bool exceeded()
{
return counter_ > 2;
}
~PairLim()
{
--counter_;
}
};
class DupLim
{
EDA& eda_;
size_t ind_;
public:
DupLim(EDA& eda, size_t l, size_t r, size_t op)
: eda_(eda)
, ind_(5 * ((kMaxSize - kNInputs) * l + r) + op - kNInputs)
{
eda_.flip(ind_);
}
bool used()
{
return !eda_.test(ind_);
}
~DupLim()
{
eda_.flip(ind_);
}
};
unsigned getPos(uint8_t l)
{
return dag_.nodes[l].value;
}
bool tryNode(uint8_t l, uint8_t r, uint8_t op)
{
//DupLim dl(dup_op_, l, r, op);
//if (dl.used())
// return false;
addNode(l, r, op);
auto node = dag_.nodes[dag_.size - 1];
const auto ind = findIndex(g_target_masks, node.value);
const auto m = (1 << ind) & targets_;
if (m)
{
++node.use_cnt;
--unused_;
if (targets_ == m)
{
best_ = dag_;
--dag_.size;
--dag_.score;
return true;
}
targets_ &= ~m;
}
search();
if (!m)
{
--unused_;
}
targets_ |= m;
--dag_.size;
--dag_.score;
return false;
}
public:
Find()
: ops_(kNInputs)
, targets_(g_targets)
, unused_(0)
{
dag_.score = 0;
dag_.size = kNInputs;
best_.score = g_score_limit;
best_.size = 0;
for (int i = 0; i < kNInputs; ++i)
dag_.nodes[i].setLit(static_cast<uint8_t>(i), g_literal_vals[i]);
fill(begin(same_pair_), end(same_pair_), 0);
}
void addNode(const uint8_t l, const uint8_t r, uint8_t op)
{
auto& node = dag_.nodes[dag_.size];
switch (op)
{
case OpAnd:
node.value = getPos(l) & getPos(r);
break;
case OpOr:
node.value = getPos(l) | getPos(r);
break;
case OpXor:
node.value = getPos(l) ^ getPos(r);
break;
case OpNAndL:
node.value = ~getPos(l) & getPos(r);
break;
case OpNAndR:
node.value = getPos(l) & ~getPos(r);
break;
default:
assert(false);
}
node.op = op;
node.l_not = false;
node.r_not = false;
node.left = l;
node.right = r;
node.use_cnt = 0;
if (op == OpNAndL)
{
node.l_not = true;
node.op = OpAnd;
}
else if (op == OpNAndR)
{
node.r_not = true;
node.op = OpAnd;
}
++dag_.size;
++dag_.score;
++unused_;
}
void search()
{
if (dag_.score >= best_.score)
return;
for (uint8_t i_r = kNTargets; i_r < dag_.size; ++i_r)
{
UseCnt uc_r(unused_, dag_.nodes[i_r].use_cnt);
if (unused_ > 2 * (best_.score - dag_.score) - 1)
continue;
for (uint8_t i_l = kNInputs; i_l < i_r; ++i_l)
{
UseCnt uc_l(unused_, dag_.nodes[i_l].use_cnt);
if (unused_ > 2 * (best_.score - dag_.score) - 2)
continue;
if (dag_.nodes[i_l].left == dag_.nodes[i_r].left &&
dag_.nodes[i_l].right == dag_.nodes[i_r].right
)
continue;
PairLim pl(same_pair_, i_l, i_r);
if (pl.exceeded())
continue;
if (tryNode(i_l, i_r, OpAnd))
return;
if (tryNode(i_l, i_r, OpOr))
return;
if (tryNode(i_l, i_r, OpXor))
return;
if (kNAnd)
{
if (tryNode(i_l, i_r, OpNAndL))
return;
if (tryNode(i_l, i_r, OpNAndR))
return;
}
}
}
}
void print(ostream& out, const char* name)
{
if (best_.score < g_score_limit)
{
out << name << " ops = " << best_.score << '\n';
best_.printAll(out);
out << '\n';
}
else
{
out << name << " impossible\n\n";
}
}
void process(ostream& out, const char* name)
{
search();
lock_guard<mutex> lk(g_print_mutex);
print(out, name);
}
};
unsigned readTargets(char* str)
{
unsigned num = 0;
for (; *str; ++str)
{
if (*str >= '0' && *str <= '4')
{
g_targets |= 1 << (*str - '0');
++num;
}
}
return num;
}
void usage()
{
cerr << "Usage: bitcnt [0-4]*\n"
"example: bitcnt 23 (to find targets m2,m3)\n";
exit(1);
}
int main(int argc, char **argv) {
if (argc > 1)
g_score_limit = 6 + 2 * readTargets(argv[1]);
else
usage();
// set score_limit to 10 for m1,m2,m3 (with nand), time ≈ 1h
array<Find, 3> finders;
finders[0].addNode(0, 1, OpAnd);
finders[0].addNode(0, 1, OpOr);
finders[0].addNode(2, 3, OpAnd);
finders[0].addNode(2, 3, OpOr);
finders[1].addNode(0, 1, OpAnd);
finders[1].addNode(0, 1, OpXor);
finders[1].addNode(2, 3, OpAnd);
finders[1].addNode(2, 3, OpXor);
finders[2].addNode(0, 1, OpXor);
finders[2].addNode(0, 1, OpOr);
finders[2].addNode(2, 3, OpXor);
finders[2].addNode(2, 3, OpOr);
auto t0 = thread([&finders]{finders[0].process(cout, "&|");});
auto t1 = thread([&finders]{finders[1].process(cout, "&^");});
auto t2 = thread([&finders]{finders[2].process(cout, "^|");});
t0.join(); t1.join(); t2.join();
}
这个答案的第一个版本仍然是正确的,但最近的结果已经过时了:
m2
在 10 个操作中:
x1 = b1 ^ b2;
x2 = b3 ^ b4;
m2 = (x1 & x2) | (((b1 & b2) ^ (b3 & b4)) & ~(x1 | x2));
m1
在 8 个操作中:
m1 = (b1 ^ b2 ^ b3 ^ b4) & ~((b1 & b2) | (b3 & b4));