@Alex 的回答很好,但是只有 256 个值(索引)的数组应该更好
import qualified Data.ByteString.Lazy as L
import qualified Data.Array.Unboxed as A
import qualified Data.ByteString as B
import Data.Int
import Data.Word
fq :: L.ByteString -> A.UArray Word8 Int64
fq = A.accumArray (+) 0 (0, 255) . map (\c -> (c, 1)) . concat . map B.unpack . L.toChunks
main = L.getContents >>= print . fq
@alex 代码占用(对于我的示例文件)24.81 段,使用数组占用 7.77 段。
更新:
虽然 Snoyman 解决方案更好,但改进unpack
可能避免
fq :: L.ByteString -> A.UArray Word8 Int64
fq = A.accumArray (+) 0 (0, 255) . toCounterC . L.toChunks
where toCounterC [] = []
toCounterC (x:xs) = toCounter x (B.length x) xs
toCounter _ 0 xs = toCounterC xs
toCounter x i xs = (B.index x i', 1): toCounter x i' xs
where i' = i - 1
约 50% 的加速。
更新:
使用IOVector
作为 Snoyman 是作为Conduit
版本(确实快一点,但这是一个原始代码,更好地使用Conduit
)
import Data.Int
import Data.Word
import Control.Monad.IO.Class
import qualified Data.ByteString.Lazy as L
import qualified Data.Array.Unboxed as A
import qualified Data.ByteString as B
import qualified Data.Vector.Unboxed.Mutable as V
fq :: L.ByteString -> IO (V.IOVector Int64)
fq xs =
do
v <- V.replicate 256 0 :: IO (V.IOVector Int64)
g v $ L.toChunks xs
return v
where g v = toCounterC
where toCounterC [] = return ()
toCounterC (x:xs) = toCounter x (B.length x) xs
toCounter _ 0 xs = toCounterC xs
toCounter x i xs = do
let i' = i - 1
w = fromIntegral $ B.index x i'
c <- V.read v w
V.write v w (c + 1)
toCounter x i' xs
main = do
v <- L.getContents >>= fq
mapM_ (\i -> V.read v i >>= liftIO . putStr . (++", ") . show) [0..255]