2

我正在尝试使用parsec编写以下解析器:

manyLength
  :: forall s u m a.
     Monad m
  => ParsecT s u m a -> ParsecT s u m Int
manyLength p = go 0
  where
    go :: Int -> ParsecT s u m Int
    go !i = (p *> go (i + 1)) <|> pure i

这就像many函数一样,但不是返回[a],而是返回成功的次数Parser a

这行得通,但我似乎无法让它在恒定的堆空间中运行。这是有道理的,因为递归调用go不在尾部调用位置。

如果 parsec 将构造函数导出到ParsecT,则可以以manyLengthCPS 的形式重写。这与函数非常相似manyAccum

manyLengthCPS :: forall s u m a. ParsecT s u m a -> ParsecT s u m Int
manyLengthCPS p = ParsecT f
  where
    f
      :: forall b.
         State s u
      -> (Int -> State s u -> ParseError -> m b) -- consumed ok
      -> (ParseError -> m b)                     -- consumed err
      -> (Int -> State s u -> ParseError -> m b) -- empty ok
      -> (ParseError -> m b)                     -- empty err
      -> m b
    f s cok cerr eok _ =
      let walk :: Int -> a -> State s u -> ParseError -> m b
          walk !i _ s' _ =
            unParser p s'
              (walk $ i + 1)            -- consumed-ok
              cerr                      -- consumed-err
              manyLengthCPSErr          -- empty-ok
              (\e -> cok (i + 1) s' e)  -- empty-err
      in unParser p s (walk 0) cerr manyLengthCPSErr (\e -> eok 0 s e)
    {-# INLINE f #-}

manyLengthCPSErr :: Monad m => m a
manyLengthCPSErr =
  fail "manyLengthCPS can't be used on parser that accepts empty input"

manyLengthCPS函数确实在恒定堆空间中运行。

这是ParsecT为了完整性的构造函数:

newtype ParsecT s u m a = ParsecT
  { unParser
      :: forall b .
         State s u
      -> (a -> State s u -> ParseError -> m b) -- consumed ok
      -> (ParseError -> m b)                   -- consumed err
      -> (a -> State s u -> ParseError -> m b) -- empty ok
      -> (ParseError -> m b)                   -- empty err
      -> m b
  }

我还尝试manyLengthCPS使用低级函数直接变成非 CPSmkPT函数:

manyLengthLowLevel
  :: forall s u m a.
     Monad m
  => ParsecT s u m a -> ParsecT s u m Int
manyLengthLowLevel p = mkPT f
  where
    f :: State s u -> m (Consumed (m (Reply s u Int)))
    f parseState = do
      consumed <- runParsecT p parseState
      case consumed of
        Empty mReply -> do
          reply <- mReply
          case reply of
            Ok _ _ _ -> manyLengthErr
            Error parseErr -> pure . Empty . pure $ Ok 0 parseState parseErr
        Consumed mReply -> do
          reply <- mReply
          case reply of
            Ok a newState parseErr -> walk 0 a newState parseErr
            Error parseErr -> pure . Consumed . pure $ Error parseErr
      where
        walk
          :: Int
          -> a
          -> State s u
          -> ParseError
          -> m (Consumed (m (Reply s u Int)))
        walk !i _ parseState' _ = do
          consumed <- runParsecT p parseState'
          case consumed of
            Empty mReply -> do
              reply <- mReply
              case reply of
                Ok _ _ _ -> manyLengthErr
                Error parseErr ->
                  pure . Consumed . pure $ Ok (i + 1) parseState' parseErr
            Consumed mReply -> do
              reply <- mReply
              case reply of
                Ok a newState parseErr -> walk (i + 1) a newState parseErr
                Error parseErr -> pure . Consumed . pure $ Error parseErr

manyLengthErr :: Monad m => m a
manyLengthErr =
  fail "manyLengthLowLevel can't be used on parser that accepts empty input"

就像manyLength,manyLengthLowLevel不在恒定堆空间中运行。


是否可以编写manyLength使其在恒定堆空间中运行,即使没有以 CPS 样式编写它?如果不是,为什么不呢?是否有一些基本原因可以在 CPS 样式中但在非 CPS 样式中是可能的?

4

1 回答 1

4

这在恒定堆空间中运行。思路是先尝试p,并明确对其成功的结果进行案例分析,以决定是否运行go,从而go最终处于尾调用位置。

manyLength
  :: Monad m
  => ParsecT s u m a -> ParsecT s u m Int
manyLength p = go 0
  where
    go :: Int -> ParsecT s u m Int
    go !i = do
      success <- (p *> pure True) <|> pure False
      if success then go (i+1) else pure i
于 2017-03-29T12:52:55.893 回答