4

我的主要问题是试图找到一个合适的解决方案来自动转动它,例如:

d+c+d+f+d+c+d+f+d+c+d+f+d+c+d+f+

进入这个:

[d+c+d+f+]4

即查找彼此相邻的重复项,然后从这些重复项中制作一个较短的“循环”。到目前为止,我还没有找到合适的解决方案,我期待着回应。PS 为了避免混淆,上述示例并不是唯一需要“循环”的东西,它因文件而异。哦,这适用于 C++ 或 C# 程序,两者都可以,尽管我也愿意接受任何其他建议。此外,主要思想是所有工作都将由程序本身完成,除了文件本身之外没有用户输入。这是完整的文件,供参考,我对拉伸页面表示歉意:#0 @16 v225 y10 w250 t76

l16 $ED $EF $A9 p20,20 >ecegb>d<bgbgecgec<g >d+<b>d+f+a+>c+<a+f+a+f+d+<b>f+d+<bf+ >c <a>cegbgegec<a>ec<ae > d+c+d+f+d+c+d+f+d+c+d+f+d+c+d+f+ r1^1

/ l8 r1r1r1r1 f+<a+>f+g+cg+r4 a+c+a+g+cg+r4f+<a+>f+g+cg+r4 a+c+a+g+cg+r4f+<a+>f +g+cg+r4 a+c+a+g+cg+r4 f+<a+>f+g+cg+r4 a+c+a+g+r4g+16f16c+ a+2^g+f+g+4 f+ff+4fd+f4 d+c+d+4c+c<a+2^4 >c4d+ <g+2^4r4^ a+>c+d+4g+4a+4 r1^2^4^a+ 2^g+f+g+4 f+ff+4fd+f4 d+c+d+4c+c<a+2^4 >c4d+ <g+2^4r4^a+>c+d+4g+4a+ 4 R1 ^ 2 ^ 4 ^ r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1

#4 @22 v250 y10

l8 o3 rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+rg+ / r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1

#2 @4 v155 y10

l8 $ED $F8 $8F o4 r1r1r1 d+4f4f+4g+4 a+4r1^4^2 / d+4^fr2 f+4^fr2d+4^fr2 f+4^fr2d+4^fr2 f+4 ^fr2d+4^fr2 f+4^fr2 > d+4^fr2 f+4^fr2d+4^fr2 f+4^fr2 < f+4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2f+ 4^g+r2 f+4^fr2f+4^g+r2 f+4^fr2 > a+4^g+r2 f+1a+4^g+r2 f+1 f+4^fr2 d+1 f + 4 ^ FR2 d + 2 ^ d + 4 ^ r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1

#3 @10 v210 y10

R1 ^ 1 O3 c8r8d8r8 c8r8c8r8c8r8c8r8c8r8c8r8c8r8c8r8c8r8c8r8c8r8 C8 @ 10d16d16 @ 21 C8 @ 10d16d16 @ 21 C8 @ 10d16d16 @ 21 / C4 @ 10d8 @ 21c8 <B8> C8 @ 10d8 @ 21c8c4 @ 10d8 @ 21c8 <B8> C8 @ 10d8 @ 21c8c4 @ 10d8 @ 21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8< b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8> c8@10d8@21c8 c4@10d8@21c8<b8> @10d16d16d16d16d16r16 c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d @21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4 @10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@ 10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@ 21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@ 10d8@21c8<b8>c8@10d8@21c8 c4@10d8@21c8 @10b16b16>c16c16<b16b16a16a16c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8 c4@10d8@21c8 @10b16b16>c16c16 <b16b16a16a16c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8c4@10d8@21c8<b8>c8@10d8@21c8 c4@10d8@21c8 @10b16b16>c16c16 <b16b16a16a16

#7 @16 v230 y10

l16 $ED $EF $A9 cceeggbbggeeccee <bb>d+d+f+f+a+a+f+f+d+d+<bb>d+d+ <aa>cceeggeecc<aa>cc <g+g+bb > d + d + + FFD + d <BBG + G + BB / r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1

#5 @4 v155 y10

l8 $ED $F8 $8F o4 r1r1r1r1 d+4r1^2^4 / <a+4^>cr2 c+4^cr2<a+4^>cr2 c+4^cr2<a+4^>cr2 c+ 4^cr2<a+4^>cr2 c+4^cr2 a+4^>cr2 c+4^cr2 <a+4^>cr2 c+4^c r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1 r2 f+4^fr2 d+1f+4 ^ FR2 d + 1 C + 4 ^ CR2 <A + 1> C + 4 ^ CR2 <A + 2 ^一个+ 4 ^ r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1r1

4

4 回答 4

2

您可以使用 Smith-Waterman 算法进行局部对齐,将字符串与自身进行比较。

http://en.wikipedia.org/wiki/Smith-Waterman_algorithm

编辑:要使算法适应自对齐,您需要将对角线中的值强制为零 - 也就是说,惩罚将整个字符串与自身完全对齐的简单解决方案。然后会弹出“次佳”对齐方式。这将是最长的两个匹配子串。重复同样的事情来找到越来越短的匹配子字符串。

于 2009-05-13T19:06:58.713 回答
2

不确定这是否是您正在寻找的。

我将字符串“testtesttesttest4notaduped+c+d+f+d+c+d+f+d+c+d+f+d+c+d+f+testtesttest”转换为“[test]4 4notadupe[ d+c+d+f+]4 [测试]3 "

我相信有人会想出一个更好更有效的解决方案,因为在处理完整文件时它有点慢。我期待其他答案。

        string stringValue = "testtesttesttest4notaduped+c+d+f+d+c+d+f+d+c+d+f+d+c+d+f+testtesttest";

        for(int i = 0; i < stringValue.Length; i++)
        {
            for (int k = 1; (k*2) + i <= stringValue.Length; k++)
            {
                int count = 1;

                string compare1 = stringValue.Substring(i,k);
                string compare2 = stringValue.Substring(i + k, k);

                //Count if and how many duplicates
                while (compare1 == compare2) 
                {
                    count++;
                    k += compare1.Length;
                    if (i + k + compare1.Length > stringValue.Length)
                        break;

                    compare2 = stringValue.Substring(i + k, compare1.Length);
                } 

                if (count > 1)
                {
                    //New code.  Added a space to the end to avoid [test]4 
                    //turning using an invalid number ie: [test]44.
                    string addString = "[" + compare1 + "]" + count + " ";

                    //Only add code if we are saving space
                    if (addString.Length < compare1.Length * count)
                    {
                        stringValue = stringValue.Remove(i, count * compare1.Length);
                        stringValue = stringValue.Insert(i, addString);
                        i = i + addString.Length - 1;
                    }
                    break;
                }
            }
        }
于 2009-05-13T21:19:50.523 回答
1

LZW可以提供帮助:它使用前缀字典来搜索重复模式,并将此类数据替换为对先前条目的引用。我认为根据您的需要调整它应该不难。

于 2009-05-14T07:11:14.417 回答
0

为什么不直接使用System.IO.Compression

于 2009-05-13T19:20:43.573 回答