我相信以下代码可以帮助您:
from itertools import permutations
from random import randint
from pprint import pprint
def generate_genes():
"""
Generates a boilerplate list of genes
@rtype : list
"""
tuple_list = []
for i in range(16):
binary_var = bin(i)[2:]
if len(binary_var) != 4:
binary_var = "0" * (4 - len(binary_var)) + binary_var
tuple_list.append([('A', (1 if binary_var[0] == '1' else 0)),
('B', (1 if binary_var[1] == '1' else 0)),
('C', (1 if binary_var[2] == '1' else 0)),
('D', (1 if binary_var[3] == '1' else 0))])
return tuple_list
def all_possible_genes():
""" Generates all possible combinations of ABCD genes
@return: returns a list of combinations
@rtype: tuple
"""
gene_list = generate_genes()
all_possible_permutations = []
for gene in gene_list:
all_possible_permutations.append([var for var in permutations(gene)])
return all_possible_permutations
def gene_stringify(gene_tuple):
"""
@type gene_tuple : tuple
@param gene_tuple: The gene tuple generated
"""
return "".join(str(var[0]) for var in gene_tuple if var[1])
def dameraulevenshtein(seq1, seq2):
"""Calculate the Damerau-Levenshtein distance between sequences.
This distance is the number of additions, deletions, substitutions,
and transpositions needed to transform the first sequence into the
second. Although generally used with strings, any sequences of
comparable objects will work.
Transpositions are exchanges of *consecutive* characters; all other
operations are self-explanatory.
This implementation is O(N*M) time and O(M) space, for N and M the
lengths of the two sequences.
>>> dameraulevenshtein('ba', 'abc')
2
>>> dameraulevenshtein('fee', 'deed')
2
It works with arbitrary sequences too:
>>> dameraulevenshtein('abcd', ['b', 'a', 'c', 'd', 'e'])
2
"""
# codesnippet:D0DE4716-B6E6-4161-9219-2903BF8F547F
# Conceptually, this is based on a len(seq1) + 1 * len(seq2) + 1 matrix.
# However, only the current and two previous rows are needed at once,
# so we only store those.
oneago = None
thisrow = range(1, len(seq2) + 1) + [0]
for x in xrange(len(seq1)):
# Python lists wrap around for negative indices, so put the
# leftmost column at the *end* of the list. This matches with
# the zero-indexed strings and saves extra calculation.
twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
for y in xrange(len(seq2)):
delcost = oneago[y] + 1
addcost = thisrow[y - 1] + 1
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
thisrow[y] = min(delcost, addcost, subcost)
# This block deals with transpositions
if (x > 0 and y > 0 and seq1[x] == seq2[y - 1]
and seq1[x - 1] == seq2[y] and seq1[x] != seq2[y]):
thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
return thisrow[len(seq2) - 1]
if __name__ == '__main__':
genes = all_possible_genes()
list1 = genes[randint(0, 15)][randint(0, 23)]
list2 = genes[randint(0, 15)][randint(0, 23)]
print gene_stringify(list1)
pprint(list1)
print gene_stringify(list2)
pprint(list2)
print dameraulevenshtein(gene_stringify(list1), gene_stringify(list2))
学分
算法的迈克尔荷马