这是我从你的帖子中理解的。我做了一个 python 程序,它使用循环密钥生成加密的异或流,并尝试应用汉明字符串距离归一化方法来找到最佳的潜在循环密钥大小。我不会将事物转换为base64,而是直接应用字符串距离而不是二进制距离。
#!/usr/bin/python
import sys
from itertools import cycle
def xor_file_with_cycling_strkey(filelocation,outfile,key):
print filelocation
f=open(filelocation,'r')
f2=open(outfile,'w')
lines=[]
text=f.read()
if text != '':
for c,k in zip(text,cycle(key)):
r=chr(ord(c)^ord(k))
f2.write(r)
f2.close()
f.close()
# not used here, see compute_distance_char based on same idea.
def compute_distance(str1,str2,keysize):
count=0
print '%s %s' % (str1,str2)
str1=str1.replace("\n", "")
str2=str2.replace("\n", "")
keysize=str(keysize*8)
sbin1=format(int(str1,16),'0'+keysize+'b')
sbin2=format(int(str2,16),'0'+keysize+'b')
return hamming_distance_str(sbin1,sbin2)
#do preferer hamming_distance_bin which quicker.
def compute_distance_char(str1,str2,keysize):
count=0
str1=str1.replace("\n", "")
str2=str2.replace("\n", "")
keysize=str(keysize*8)
sbin1=''
sbin2=''
for c in str1:
sbin1=sbin1 + format(ord(c),'0'+keysize+'b')
for c in str2:
sbin2=sbin2 + format(ord(c),'0'+keysize+'b')
return hamming_distance_str(sbin1,sbin2)
def hamming_distance_str(str1,str2):
count=0
for c1,c2 in zip(str1, str2):
if c1!=c2:
count+=1
return count
def hamming_distance_bin(str1,str2):
count=0
for c1,c2 in zip(str1, str2):
if c1!=c2:
# quick hamming distance, counting number of differing bits.
s=ord(c1)^ord(c2)
# count number of bits sets using Wegner algorithm
while s !=0:
s&=(s-1);
count+=1
return count
def keysize_dist(filelocation):
potential_keysize=0
min_dist=40.0
f=open(filelocation,'r')
lines=[]
for line in f.readlines():
line=line.strip('\n')
lines.append(line)
lines=''.join(lines).strip('\n')
normalized=[]
for keysize in range(2,40):
# should first create base16 entries for that one , then don't use it : count_bin1=compute_distance(lines[0:keysize*2],lines[keysize*2:keysize*4],keysize)
# proof that both functions compute same value :
count_bin1=compute_distance_char(lines[0:keysize*2],lines[keysize*2:keysize*4],keysize)
count_bin2=hamming_distance_bin(lines[0:keysize*2],lines[keysize*2:keysize*4])
if ( count_bin1 != count_bin2 ):
print 'Discrepency between compute_distance_char->%i and hamming_distance_bin->%i' % (count_bin1,count_bin2)
count=hamming_distance_str(lines[0:keysize*2],lines[keysize*2:keysize*4])
normalized_distance=float(count)/keysize
print '%s %f' % (keysize,normalized_distance)
if ( normalized_distance < min_dist ):
potential_keysize=keysize
min_dist=normalized_distance
# we are more interested in keysize corresponding to minimal distance, tha n to minimal distance itself.
return potential_keysize,min_dist
def main(args=sys.argv):
if ( len(args) < 2 ):
print 'Please enter cleartext origin file to be ciphered then checked an optionaly a key string ( max length 40 )'
return 1
if ( len(args) > 2):
key=args[2]
else:
# on purpose default to key with a KEYSIZE char length 5.
key='12345'
xor_file_with_cycling_strkey(args[1],args[1]+'.ciphered',key)
xor_file_with_cycling_strkey(args[1]+'.ciphered',args[1] + '.cleartext',key)
# raw non base64 encoded.
print keysize_dist(args[1] + '.ciphered')
if __name__ == "__main__":
main()
使用该代码,您可以获得完全解决问题所需的所有输入。
./hamming_detect_xor_cycle.py 明文 123456789ABCDE ... (14, 1.7857142857142858)
它不能正确检测所有大小,但我认为这是一种统计效应,并且取决于本身可以具有循环特性的明文。正如您的主题所说:使用更多块可以产生更好的结果。