这就是我到目前为止所得到的。我只是错过了不应该花费时间的步骤,例如比较哈希数组和打开文件进行读取。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace RecursiveHashing
{
static class Utilities
{
// used for circular arrays. If my circular array is of size 5 and it's
// current position is 2 if I shift 3 units to the left I shouls be in index
// 4 of the array.
public static int Shift(this int number, int shift, int divisor)
{
var tempa = (number + shift) % divisor;
if (tempa < 0)
tempa = divisor + tempa;
return tempa;
}
}
class Program
{
const int CHUNCK_SIZE = 4; // split the files in chuncks of 4 bytes
/*
* formula that I will use to compute hash
*
* formula = sum(chunck) * (a[c]+1)*(a[c-1]+1)*(a[c-2]+1)*(-1^a[c])
*
* where:
* sum(chunk) = sum of current chunck
* a[c] = current byte
* a[c-1] = last byte
* a[c-2] = last last byte
* -1^a[c] = eather -1 or +1
*
* this formula is efficient because I can get the sum of any current index by keeping trak of the overal sum
* thus this algorithm should be of order n
*/
static void Main(string[] args)
{
Part1(); // Missing implementation to open file for reading
Part2();
}
// fist part compute hashes on first file
static void Part1()
{
// pertend file b reads those bytes
byte[] FileB = new byte[]{2,3,5,8,2,0,1,0,0,0,1,2,4,5,6,7,8,2,3,4,5,6,7,8,11,};
// create an array where to store the chashes
// index 0 will use a fast hash algorithm. index 1 will use a more secure hashing algorithm
Int64[,] hashes = new Int64[(FileB.Length / CHUNCK_SIZE) + 10, 2];
// used to track on what index of the file we are at
int counter = 0;
byte[] current = new byte[CHUNCK_SIZE + 1]; // circual array needed to remember the last few bytes
UInt64[] sum = new UInt64[CHUNCK_SIZE + 1]; // circual array needed to remember the last sums
int index = 0; // position where in circular array
int numberOfHashes = 0; // number of hashes created so far
while (counter < FileB.Length)
{
int i = 0;
for (; i < CHUNCK_SIZE; i++)
{
if (counter == 0)
{
sum[index] = FileB[counter];
}
else
{
sum[index] = FileB[counter] + sum[index.Shift(-1, CHUNCK_SIZE + 1)];
}
current[index] = FileB[counter];
counter++;
if (counter % CHUNCK_SIZE == 0 || counter == FileB.Length)
{
// get the sum of the last chunk
var a = (sum[index] - sum[index.Shift(1, CHUNCK_SIZE + 1)]);
Int64 tempHash = (Int64)a;
// conpute my hash function
tempHash = tempHash * ((Int64)current[index] + 1)
* ((Int64)current[index.Shift(-1, CHUNCK_SIZE + 1)] + 1)
* ((Int64)current[index.Shift(-2, CHUNCK_SIZE + 1)] + 1)
* (Int64)(Math.Pow(-1, current[index]));
// add the hashes to the array
hashes[numberOfHashes, 0] = tempHash;
numberOfHashes++;
hashes[numberOfHashes, 1] = -1;// later store a stronger hash function
numberOfHashes++;
// MISSING IMPLEMENTATION TO STORE A SECOND STRONGER HASH FUNCTION
if (counter == FileB.Length)
break;
}
index++;
index = index % (CHUNCK_SIZE + 1); // if index is out of bounds in circular array place it at position 0
}
}
}
static void Part2()
{
// simulate file read of a similar file
byte[] FileB = new byte[]{1,3,5,8,2,0,1,0,0,0,1,2,4,5,6,7,8,2,3,4,5,6,7,8,11};
// place where we will place all matching hashes
Int64[,] hashes = new Int64[(FileB.Length / CHUNCK_SIZE) + 10, 2];
int counter = 0;
byte[] current = new byte[CHUNCK_SIZE + 1]; // circual array
UInt64[] sum = new UInt64[CHUNCK_SIZE + 1]; // circual array
int index = 0; // position where in circular array
while (counter < FileB.Length)
{
int i = 0;
for (; i < CHUNCK_SIZE; i++)
{
if (counter == 0)
{
sum[index] = FileB[counter];
}
else
{
sum[index] = FileB[counter] + sum[index.Shift(-1, CHUNCK_SIZE + 1)];
}
current[index] = FileB[counter];
counter++;
// here we compute the hash every time and we are missing implementation to
// check if hash is contained by the other file
if (counter >= CHUNCK_SIZE)
{
var a = (sum[index] - sum[index.Shift(1, CHUNCK_SIZE + 1)]);
Int64 tempHash = (Int64)a;
tempHash = tempHash * ((Int64)current[index] + 1)
* ((Int64)current[index.Shift(-1, CHUNCK_SIZE + 1)] + 1)
* ((Int64)current[index.Shift(-2, CHUNCK_SIZE + 1)] + 1)
* (Int64)(Math.Pow(-1, current[index]));
if (counter == FileB.Length)
break;
}
index++;
index = index % (CHUNCK_SIZE + 1);
}
}
}
}
}
使用相同算法在表中表示的相同文件
hashes
bytes sum Ac A[c-1] A[c-2] -1^Ac sum * (Ac+1) * (A[c-1]+1) * (A[c-2]+1)
2 2
3 5
5 10 5 3 2 -1
8 18 8 5 3 1 3888
2 20 2 8 5 1
0 20 0 2 8 1
1 21 1 0 2 -1
0 21 0 1 0 1 6
0 21 0 0 1 1
0 21 0 0 0 1
1 22 1 0 0 -1
2 24 2 1 0 1 18
4 28 4 2 1 1
5 33 5 4 2 -1
6 39 6 5 4 1
7 46 7 6 5 -1 -7392
8 54 8 7 6 1
2 56 2 8 7 1
3 59 3 2 8 -1
4 63 4 3 2 1 1020
5 68 5 4 3 -1
6 74 6 5 4 1
7 81 7 6 5 -1
8 89 8 7 6 1 13104
11 100 11 8 7 -1 -27648
file b
rolling hashes
bytes 0 Ac A[c-1] A[c-2] -1^Ac sum * (Ac+1) * (A[c-1]+1) * (A[c-2]+1)
1 1
3 4
5 9 5 3 1 -1
8 17 8 5 3 1 3672
2 19 2 8 5 1 2916
0 19 0 2 8 1 405
1 20 1 0 2 -1 -66
0 20 0 1 0 1 6
0 20 0 0 1 1 2
0 20 0 0 0 1 1
1 21 1 0 0 -1 -2
2 23 2 1 0 1 18
4 27 4 2 1 1 210
5 32 5 4 2 -1 -1080
6 38 6 5 4 1 3570
7 45 7 6 5 -1 -7392
8 53 8 7 6 1 13104
2 55 2 8 7 1 4968
3 58 3 2 8 -1 -2160
4 62 4 3 2 1 1020
5 67 5 4 3 -1 -1680
6 73 6 5 4 1 3780
7 80 7 6 5 -1 -7392
8 88 8 7 6 1 13104
11 99 11 8 7 -1 -27648