我想使用 Megaparsec 解析一种基本的缩进语言。最初我使用的是 Parsec,我设法通过缩进正常工作,但现在我遇到了一些麻烦。
我一直在这里学习教程,这是我必须解析语言忽略缩进的代码。
module Parser where
import Data.Functor ((<$>), (<$))
import Control.Applicative (Applicative(..))
import qualified Control.Monad as M
import Control.Monad (void)
import Data.Functor.Identity
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Perm
import Text.Megaparsec.Expr
import qualified Text.Megaparsec.Char.Lexer as L
import Text.Pretty.Simple
import Data.Either.Unwrap
--import Lexer
import Syntax
type Parser = Parsec Void String
lineComment :: Parser ()
lineComment = L.skipLineComment "#"
scn :: Parser ()
scn = L.space space1 lineComment empty
sc :: Parser () -- ‘sc’ stands for “space consumer”
sc = L.space (void $ takeWhile1P Nothing f) lineComment empty
where
f x = x == ' ' || x == '\t'
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
symbol :: String -> Parser String
symbol = L.symbol sc
integer :: Parser Integer
integer = lexeme L.decimal
semi :: Parser String
semi = symbol ";"
rword :: String -> Parser ()
rword w = lexeme (string w *> notFollowedBy alphaNumChar)
rws :: [String] -- list of reserved words
rws = ["if","then","else","while","do","skip","true","false","not","and","or"]
identifier :: Parser String
identifier = (lexeme . try) (p >>= check)
where
p = (:) <$> letterChar <*> many alphaNumChar
check x = if x `elem` rws
then fail $ "keyword " ++ show x ++ " cannot be an identifier"
else return x
parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")
whileParser :: Parser Stmt
whileParser = between sc eof stmt
stmt :: Parser Stmt
stmt = f <$> sepBy1 stmt' semi
where
-- if there's only one stmt return it without using ‘Seq’
f l = if length l == 1 then head l else Seq l
stmt' :: Parser Stmt
stmt' = ifStmt
<|> whileStmt
<|> skipStmt
<|> assignStmt
<|> parens stmt
ifStmt :: Parser Stmt
ifStmt = do
rword "if"
cond <- bExpr
rword "then"
stmt1 <- stmt
rword "else"
stmt2 <- stmt
return (If cond stmt1 stmt2)
whileStmt :: Parser Stmt
whileStmt = do
rword "while"
cond <- bExpr
rword "do"
stmt1 <- stmt
return (While cond stmt1)
assignStmt :: Parser Stmt
assignStmt = do
var <- identifier
void (symbol ":=")
expr <- aExpr
return (Assign var expr)
skipStmt :: Parser Stmt
skipStmt = Skip <$ rword "skip"
aExpr :: Parser AExpr
aExpr = makeExprParser aTerm aOperators
bExpr :: Parser BExpr
bExpr = makeExprParser bTerm bOperators
aOperators :: [[Operator Parser AExpr]]
aOperators =
[ [Prefix (Neg <$ symbol "-") ]
, [ InfixL (ABinary Multiply <$ symbol "*")
, InfixL (ABinary Divide <$ symbol "/") ]
, [ InfixL (ABinary Add <$ symbol "+")
, InfixL (ABinary Subtract <$ symbol "-") ]
]
bOperators :: [[Operator Parser BExpr]]
bOperators =
[ [Prefix (Not <$ rword "not") ]
, [InfixL (BBinary And <$ rword "and")
, InfixL (BBinary Or <$ rword "or") ]
]
aTerm :: Parser AExpr
aTerm = parens aExpr
<|> Var <$> identifier
<|> IntConst <$> integer
bTerm :: Parser BExpr
bTerm = parens bExpr
<|> (BoolConst True <$ rword "true")
<|> (BoolConst False <$ rword "false")
<|> rExpr
rExpr :: Parser BExpr
rExpr = do
a1 <- aExpr
op <- relation
a2 <- aExpr
return (RBinary op a1 a2)
relation :: Parser RBinOp
relation = (symbol ">" *> pure Greater)
<|> (symbol "<" *> pure Less)
parsePrint :: String -> IO()
parsePrint s = do
parseTest stmt' s
运行此解析正确。
parsePrint $ unlines
[ "while (true) do if(false) then x := 5 else y := 20"
]
这是从这里的第二个教程解析缩进的代码。
{-# LANGUAGE TupleSections #-}
module Main where
import Control.Applicative (empty)
import Control.Monad (void)
import Data.Void
import Data.Char (isAlphaNum)
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = Parsec Void String
lineComment :: Parser ()
lineComment = L.skipLineComment "#"
scn :: Parser ()
scn = L.space space1 lineComment empty
sc :: Parser ()
sc = L.space (void $ takeWhile1P Nothing f) lineComment empty
where
f x = x == ' ' || x == '\t'
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
pItem :: Parser String
pItem = lexeme (takeWhile1P Nothing f) <?> "list item"
where
f x = isAlphaNum x || x == '-'
pComplexItem :: Parser (String, [String])
pComplexItem = L.indentBlock scn p
where
p = do
header <- pItem
return (L.IndentMany Nothing (return . (header, )) pLineFold)
pLineFold :: Parser String
pLineFold = L.lineFold scn $ \sc' ->
let ps = takeWhile1P Nothing f `sepBy1` try sc'
f x = isAlphaNum x || x == '-'
in unwords <$> ps <* sc
pItemList :: Parser (String, [(String, [String])])
pItemList = L.nonIndented scn (L.indentBlock scn p)
where
p = do
header <- pItem
return (L.IndentSome Nothing (return . (header, )) pComplexItem)
parser :: Parser (String, [(String, [String])])
parser = pItemList <* eof
main :: IO ()
main = return ()
我想作为一个正确解析的例子。
parsePrint $ unlines
[ "while (true) do"
, " if(false) then x := 5 else y := 20"
]
我怎样才能正确解析缩进?还有其他地方有关于使用 Megaparsec 的教程/文档吗?