3

给定一个种子字符串,我想找到它的邻居最多在 2 个位置上不同。生成字符串所涉及的所有数字只有四个(即0、1、2、3)。这是我的意思的例子:

# In this example, 'first' column
# are neighbors with only 1 position differ.
# The rest of the columns are 2 positions differ

Seed = 000
100 110 120 130 101 102 103
200 210 220 230 201 202 203
300 310 320 330 301 302 303
010 011 012  013
020 021 022  023
030 031 032  033
001  
002  
003

Seed = 001
101 111 121 131 100 102 103   
201 211 221 231 200 202 203      
301 311 321 331 300 302 303      
011 010 012 013
021 020 022 023
031 030 032 033               
000
003
002     

Hence given a tag of length L
we will have 3*L + 9L(L-1)/2   neighbors  

但是为什么我的这段代码无法正确生成呢?特别是当种子字符串不是“000”时。

其他方法也受到欢迎,尤其是速度改进。因为我们将处理数百万个长度为 34 到 36 的种子标签。

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
using namespace std;

string ConvertInt2String(int IntVal) {
    std::string S;
    std::stringstream out;
    out << IntVal;
    S = out.str();

    return S;
}

string Vec2Str (vector <int> NTg) {

    string StTg = "";
    for (unsigned i = 0; i < NTg.size(); i++) {
         StTg += ConvertInt2String(NTg[i]);
    }
    return StTg;
}

template <typename T> void  prn_vec(const std::vector < T >&arg, string sep="")
{
    for (unsigned n = 0; n < arg.size(); n++) {
        cout << arg[n] << sep;
    }
    return;
}

vector <int> neighbors(vector<int>& arg, int posNo, int baseNo) {
    // pass base position and return neighbors

    vector <int> transfVec;
    transfVec = arg;

    //modified according to strager's first post
    transfVec[posNo % arg.size()] = baseNo;

    return transfVec;

}


int main () {

    vector <int> numTag;
    numTag.push_back(0);
    numTag.push_back(0);
    numTag.push_back(1); // If "000" this code works, but not 001 or others


    // Note that in actual practice numTag can be greater than 3

     int TagLen = static_cast<int>(numTag.size());

     for ( int p=0; p< TagLen  ; p++ ) {

         // First loop is to generate tags 1 position differ
         for ( int b=1; b<=3 ; b++ ) {

             int bval = b;
             if (numTag[p] == b) {
                 bval = 0;
             }

             vector <int> nbnumTag = neighbors(numTag, p, bval);
             string SnbnumTag = Vec2Str(nbnumTag);

             cout << SnbnumTag;
             cout << "\n";


             // Second loop for tags in 2 position differ 

             for (int l=p+1; l < TagLen; l++) {

                 for (int  c=1; c<=3; c++) {

                     int cval = c;

                     if (nbnumTag[l] == c) {
                         cval = c;
                     }
                     vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
                     string SnbnumTag2 = Vec2Str(nbnumTag2);

                     cout << "\t" << SnbnumTag2;
                     cout << "\n";

                 }
             }


         }
     }

    return 0;
}
4

8 回答 8

4

这会做吗?它枚举可能的字符串树,修剪所有与原始字符串>2 的差异。

void walk(char* s, int i, int ndiff){
  char c = s[i];
  if (ndiff > 2) return;
  if (c == '\0'){
    if (ndiff > 0) print(s);
  }
  else {
    s[i] = '0'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '1'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '2'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '3'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = c;
  }
}

char seed[] = "000";
main(){
  walk(seed, 0, 0);
}
于 2009-03-28T02:20:56.890 回答
2

这是一种适用于任意数量的字符和长度的字符串的方法:

string base = "000";
char values[] = {'0', '1', '2', '3' };

for (int i = 0; i < base.length(); ++i)
{
   for (int j = 0; j < countof(values); ++j)
   {
      if (base[i] != values[j])
      {
          string copy = base;
          copy[i] = values[j];
          cout << copy << endl;

          for (int k = i+1; k < base.length(); ++k)
          {
              for (int l = 0; l < countof(values); ++l)
              {
                   if (copy[k] != values[l])
                   {
                       string copy2 = copy;
                       copy[k] = values[l];
                       cout << copy2 << endl;
                   }
              }
          }
      }
   }
}
于 2009-03-25T03:07:59.000 回答
1

这应该等效于在 4 个符号字母表上生成汉明距离为 2 内的所有字符串。我已经看到了它的算法,但我现在找不到它们。也许这可以作为正确方向的指针。

于 2009-03-25T02:59:17.650 回答
1

您的问题 [编辑:原始问题(参见问题的先前版本) ] 是在您的内部循环中,您只分配了“下一个”元素。一个快速的解决方法是将写入包装在neighbors

vector <int> neighbors(const vector<int>& arg, int posNo, int baseNo) {
    // pass base position and return neighbors

    vector <int> transfVec = arg

    transfVec[posNo % arg.size()] = baseNo;

    return transfVec;

}

此修复仅在您的数组中有两个或三个项目时有效。如果你想要更多,你需要重写你的算法,因为它根本不处理长度大于三的情况。(它不应该,甚至。你使用的算法太严格了。)

于 2009-03-25T03:08:49.373 回答
1

这两个如果是:

 if (numTag[p] == b) {
     bval = 0;
 }

 if (nbnumTag[l] == c) {
     cval = c;
 }

应该有continue.


这两个循环应该从 0 开始:

for ( int b=1; b<=3 ; b++ ) {
for (int  c=1; c<=3; c++) {

// i.e.

for ( int b=0; b<=3 ; b++ ) {
for (int  c=0; c<=3; c++) {
于 2009-03-28T02:05:13.420 回答
1

看起来 strager 已经确定了主要问题:循环条件。你的字母表是 0,1,2,3,所以你应该遍历整个范围。0 不是特殊情况,因为您的代码会尝试处理它。特殊情况是当字母值等于键中的值时跳过迭代,这是 strager 建议的 continue 完成的。

以下是我的算法版本。它对循环结构有一些替代想法,并且通过就地修改密钥来避免复制密钥。请注意,您还可以通过更改MIN_VALUEMAX_VALUE常量来更改字母表的大小。

这是“001”案例的输出:

101 111 121 131 102 103 100
201 211 221 231 202 203 200
301 311 321 331 302 303 300
011 012 013 010
021 022 023 020
031 032 033 030
002
003
000

这是代码:

#include <iostream>
#include <vector>
#include <string>
#include <sstream>

using namespace std;

const int MIN_VALUE = 0;
const int MAX_VALUE = 3;

int increment(int& ch)
{
    if (ch == MAX_VALUE)
        ch = MIN_VALUE;
    else
        ++ch;
    return ch;
}

string stringKey(const vector<int>& key)
{
    ostringstream sout;
    for (int i = 0; i < key.size(); ++i) 
        sout << key[i];
    return sout.str();
}

int main()
{
    vector<int> key;
    key.push_back(0);
    key.push_back(0);
    key.push_back(1);

    for (int outerKeyPos = 0;  outerKeyPos < key.size(); ++outerKeyPos)
    {
        int outerOriginal = key[outerKeyPos];
        while (increment(key[outerKeyPos]) != outerOriginal)
        {
            cout << stringKey(key);
            for (int innerKeyPos = outerKeyPos + 1; innerKeyPos < key.size(); ++innerKeyPos)
            {
                int innerOriginal = key[innerKeyPos];
                while (increment(key[innerKeyPos]) != innerOriginal)
                {
                    cout << " " << stringKey(key);
                }
            }
            cout << endl;
        }
    }
}
于 2009-03-28T06:08:53.037 回答
1

我试图纠正你的算法,尽可能接近原来的算法:

 int TagLen = static_cast<int>(numTag.size());

 for ( int p=0; p< TagLen  ; p++ ) {
     // First loop is to generate tags 1 position differ
     for ( int b=0; b<=3 ; b++ ) { // Loop over all 4 elements

         int bval = b;
         if (numTag[p] == b) {
             continue; // This is the seed vector, ignore it
         }

         vector <int> nbnumTag = neighbors(numTag, p, bval);
         string SnbnumTag = Vec2Str(nbnumTag);

         cout << SnbnumTag;
         cout << "\n";

         // Second loop for tags in 2 position differ 
         for (int l=p+1; l < TagLen; l++) {

             for (int  c=0; c<=3; c++) {

                 int cval = c;

                 if (nbnumTag[l] == c) { // Loop over all 4 elements
                     continue; // This is nbnumTag, ignore it
                 }
                 vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
                 string SnbnumTag2 = Vec2Str(nbnumTag2);

                 cout << "\t" << SnbnumTag2;
                 cout << "\n";
             }
         }
     }
 }

The problem is that you don't iterate over all 4 possible values (0,1,2,3), but you skip 0 for some reason. The way I am doing it is to iterate over all of them and ignore (by using a continue) the vector that is the same with the seed or the 1-point different tag computed at phase 1.

Having said that, I believe that better algorithms than yours are proposed and it would be better to consider one of them.

于 2009-04-02T08:59:14.990 回答
0

这是我丑陋的,hacky的解决方案:

#include <iostream>
#include <vector>

using std::cout;
using std::endl;
using std::vector;

struct tri
{
    tri(int a, int b, int c)
    {
        switch (a)
        {
            case 0:
                m[0] = 0;
                m[1] = b;
                m[2] = c;
                break;
            case 1:
                m[0] = b;
                m[1] = 0;
                m[2] = c;
                break;
            case 2:
                m[0] = b;
                m[1] = c;
                m[2] = 0;
                break;
        }
    }
    int m[3];
};

int main()
{
    vector<tri> v;
    for (int i = 0; i < 3; i++)
        for (int j = 0; j < 4; j++)
            for (int k = 0; k < 4; k++)
            {
                v.push_back(tri(i,j,k));
            }

    vector<tri>::iterator it;
    for (it = v.begin(); it != v.end(); ++it)
    {
        cout << (*it).m[0];
        cout << (*it).m[1];
        cout << (*it).m[2];
        cout << endl;
    }
}
于 2009-03-25T02:49:10.607 回答