一种方法是从指定确定性自动机接受的具有正确字母计数的单词中随机均匀采样。该算法是自动机状态和剩余符号数量的动态程序。以下是一些带有 20 个 a 和 20 个 b 的示例输出:
abbaabbbaabbbabaaabbaaababbaaabbbabbbaaa
bbaaababababbaaabbababababbbabaaababaabb
bbababbbaaabaaabbbabaabaaabbbaababbababa
ababbbabbababbbaabababbaababaabbaaababaa
bbaaababbababbabaabbababaabababaabababba
bbaabbababbbaabbababaaabaababbbaababaaab
babaabaabbababbababbababbaababaaababbaba
aaabababaababbabbababbbaabbababaabbaaabb
babababbabaaababababababaababbbaabbaabba
bbabaabababababbabaababaababbbaabbabaaba
这是产生这些的Python。
from collections import namedtuple
from itertools import product, repeat
from random import random
"""
deterministic finite automata
delta is a dict from state-symbol pairs to states
q0 is the initial state
F is the set of accepting states
"""
DFA = namedtuple('DFA', ('delta', 'q0', 'F'))
"""accepts strings with no runs of length 4"""
noruns4 = DFA(
delta={
('0', 'a'): '1a',
('0', 'b'): '1b',
('1a', 'a'): '2a',
('1a', 'b'): '1b',
('1b', 'a'): '1a',
('1b', 'b'): '2b',
('2a', 'a'): '3a',
('2a', 'b'): '1b',
('2b', 'a'): '1a',
('2b', 'b'): '3b',
('3a', 'a'): '4',
('3a', 'b'): '1b',
('3b', 'a'): '1a',
('3b', 'b'): '4',
('4', 'a'): '4',
('4', 'b'): '4'},
q0='0',
F={'0', '1a', '1b', '2a', '2b', '3a', '3b'})
def accepts(dfa, s):
"""returns whether dfa accepts s"""
q = dfa.q0
for c in s:
q = dfa.delta[(q, c)]
return q in dfa.F
def testaccepts():
for n in range(10):
for cs in product(*repeat('ab', n)):
s = ''.join(cs)
if not accepts(noruns4, s) != ('aaaa' in s or 'bbbb' in s):
print(s)
assert False
testaccepts()
def acceptedstrcnts(dfa, syms, cnts, memo=None, q=None):
"""
counts the number of strings accepted by dfa,
subject to the constraint of having the specified number of symbols
"""
if memo is None:
memo = {}
if q is None:
q = dfa.q0
key = (q,) + tuple(cnts)
if key not in memo:
if sum(cnts) > 0:
total = 0
for (i, cnt) in enumerate(cnts):
if cnt > 0:
newcnts = list(cnts)
newcnts[i] -= 1
newq = dfa.delta[(q, syms[i])]
total += acceptedstrcnts(dfa, syms, newcnts, memo, newq)
else:
total = 1.0 if q in dfa.F else 0.0
memo[key] = total
return memo[key]
print(acceptedstrcnts(noruns4, 'ab', (125, 50)))
memo = {}
acceptedstrcnts(noruns4, 'ab', (4, 4), memo)
# 62 strings with 4 a's, 4 b's, and no runs
print(memo)
def memoget(memo, q, cnts):
return memo[(q,) + tuple(cnts)]
def samplestrcnts(dfa, syms, cnts, memo):
"""
uses the memoization dict to sample the counted words
modulo roundoff error, the sampling is uniform
"""
cnts = list(cnts)
cs = []
q = dfa.q0
while sum(cnts) > 0:
denom = memoget(memo, q, cnts)
outcome = random()
j = None
for (i, cnt) in enumerate(cnts):
if cnt > 0:
j = i # default in case roundoff bites us
newcnts = list(cnts)
newcnts[i] -= 1
newq = dfa.delta[(q, syms[i])]
numer = memoget(memo, newq, newcnts)
ratio = numer / denom
if outcome < ratio:
break
outcome -= ratio
cnts[j] -= 1
cs.append(syms[j])
q = dfa.delta[(q, syms[j])]
return ''.join(cs)
acceptedstrcnts(noruns4, 'ab', (20, 20), memo)
for k in range(10):
print(samplestrcnts(noruns4, 'ab', (20, 20), memo))