我编写了下面的代码来列出列表列表的子序列频率(结果包括子序列和出现子序列的列表的索引)。有没有人有任何建议如何使它更简洁和/或高效?
样本输出:
*Main> combFreq [[1,2,3,5,7,8],[2,3,5,6,7],[3,5,7,9],[1,2,3,7, 9],[3,5,7,10]]
[([3,5],[0,1,2,4]),([2,3],[0,1,3]),([ 3,5,7],[0,2,4]),([5,7],[0,2,4]),([2,3,5],[0,1]),([ 1,2],[0,3]),([1,2,3],[0,3]),([7,9],[2,3])]
import Data.List
import Data.Function (on)
--[[1,2,3,5,7,8],[2,3,5,6,7],[3,5,7,9],[1,2,3,7,9],[3,5,7,10]]
tupleCat x y = (fst x, sort $ nub $ snd x ++ snd y)
isInResult x result = case lookup x result of
Just a -> [a]
Nothing -> []
sInt xs = concat $ sInt' (csubs xs) 0 (length xs) where
csubs = map (filter (not . null) . concatMap inits . tails)
sInt' [] _ _ = []
sInt' (x:xs) count origLen =
let result = (zip (zip (replicate (length xs) count) [count+1..origLen])
$ map (\y -> intersect x y) xs)
in concatMap (\x -> let a = fst x in map (\y -> (y,a)) (snd x))
result : sInt' xs (count + 1) origLen
concatResults [] result = result
concatResults (x:xs) result =
let match = isInResult (fst x) result
newX = (fst x, [fst $ snd x, snd $ snd x])
in if not (null match)
then let match' = (fst x, head match)
newResult = deleteBy (\x -> (==match')) match' result
in concatResults xs (tupleCat match' newX : newResult)
else concatResults xs (newX : result)
combFreq xs =
filter (\x -> length (fst x) > 1)
$ reverse $ sortBy (compare `on` (length . snd)) $ concatResults (sInt xs) []