1

我想解析以下文本:

 WHERE
 ( AND
       ApplicationGroup.REFSTR = 5
       BV_1.Year = 2009
       BV_1.MonetaryCodeId = 'Commited'
       BV_3.Year = 2009
       BV_3.MonetaryCodeId = 'Commited'
       BV_4.Year = 2009
       BV_4.MonetaryCodeId = 'Commited
 )

我从条件列表的组合器开始:

let multiConditionWhereList : Parser<WhereCondition list, unit> =
        sepEndBy1 (ws >>. whereCondition) (newline)
        <?> "where condition list"

当我交出 where 语句的条件列表时(每行都有一个 =),我得到一个回复​​,其结果中有七个 WhereConditions。状态为“正常”。但错误列表包含“预期换行”错误消息。

但是,每当我尝试解析这种用圆括号括起来的列表时,它的开头带有一个操作符,并带有以下形状的组合器:

let multiConditionWhereClause : Parser<WhereStatement, unit> =
        pstringCI "where"
        .>> spaces 
        >>. between (pchar '(') (pchar ')') 
                    ( ws  >>. whereChainOperator .>> spaces1
                      .>>. multiConditionWhereList )
        |>> (fun (chainOp, conds) -> { Operator = chainOp; 
                                       SearchConditions = conds } )

我收到状态为“错误”的回复。但是错误列表和结果都是空的。

所以我有点卡在这一点上。首先我不明白,为什么我的multiConditionWhereList中的sepByEnd1组合器会产生一个非空的错误列表并希望在末尾有一个换行符。更重要的是,当我将列表包装在 between 语句中时,我不明白为什么没有捕获该列表。

作为参考,我包括了整套规则以及导致问题的规则的调用:

#light

#r "System.Xml.Linq.dll"
#r @"..\packages\FParsec.1.0.1\lib\net40-client\FParsecCS.dll"
#r @"..\packages\FParsec.1.0.1\lib\net40-client\FParsec.dll"

module Ast =    
    open System
    open System.Xml.Linq

    type AlfabetParseError (msg: string) =
              inherit Exception (msg)

    type FindStatement = 
            { TableReferences: TableReferences;}

    and TableReferences = 
            { PrimaryTableReference: TableReferenceWithAlias; JoinTableReferences: JoinTableReference list; }

    and TableReferenceWithAlias = 
            { Name: string; Alias: string }

    and JoinTableReference = 
            { JoinType:JoinType; TableReference: TableReferenceWithAlias; JoinCondition: JoinCondition; }

    and JoinType =
            | InnerJoin
            | OuterJoin
            | LeftJoin
            | RightJoin

    and JoinCondition = 
            { LeftHandSide: FieldReference; RightHandSide: FieldReference; }

    and WhereStatement = 
            { Operator: WhereOperator; SearchConditions: WhereCondition list }

    and WhereOperator = 
            | And
            | Or
            | Equal
            | Is
            | IsNot
            | Contains
            | Like
            | NoOp
    and WhereLeftHandSide =
            | FieldReferenceLH of FieldReference

    and WhereRightHandSide =
            | FieldReferenceRH of FieldReference
            | VariableReferenceRH of VariableReference
            | LiteralRH of Literal

    and WhereCondition =
            { LeftHandSide: WhereLeftHandSide; Operator: WhereOperator; RightHandSide: WhereRightHandSide; }

    and FieldReference =
            { FieldName: Identifier; TableName: Identifier }

    and VariableReference =
            { VariableName : Identifier; }

    and Literal = 
            | Str of string
            | Int of int
            | Hex of int
            | Bin of int
            | Float of float
            | Null 

    and Identifier = 
              Identifier of string  

    and QueryXml =
            { Doc : XDocument }  

module AlfabetQueryParser =
    open Ast
    open FParsec
    open System
    open System.Xml.Linq

    module Parsers =

        (* Utilities *)
        let toJoinType (str:string) = 
            match str.ToLowerInvariant() with
            | "innerjoin" -> InnerJoin
            | "outerjoin" -> OuterJoin
            | "leftjoin"  -> LeftJoin
            | "rightjoin" -> RightJoin
            | _           -> raise <| AlfabetParseError "Invalid join type"

        let toWhereOperator (str:string) = 
            match str.ToLowerInvariant() with
            | "and"       -> And
            | "or"        -> Or
            | "="         -> Equal
            | "is"        -> Is
            | "is not"    -> IsNot
            | "contains"  -> Contains
            | "like"      -> Like
            | _           -> raise <| AlfabetParseError "Invalid where operator type"

        (* Parsers *)
        let ws : Parser<string, unit> =
            manyChars (satisfy (fun c -> c = ' '))

        let ws1 : Parser<string, unit> =
            many1Chars (satisfy (fun c -> c = ' '))

        let identifier : Parser<string, unit> = 
            many1Chars (satisfy (fun(c) -> isDigit(c) || isAsciiLetter(c) || c.Equals('_')))

        let fieldReference : Parser<FieldReference, unit> =
            identifier 
            .>> pstring "." 
            .>>. identifier
            |>> (fun (tname, fname) -> {FieldName = Identifier(fname); 
                                        TableName = Identifier(tname) })

        let variableReference : Parser<VariableReference, unit> =
            pstring ":"
            >>. identifier
            |>> (fun vname -> { VariableName = Identifier(vname) })

        let numeralOrDecimal : Parser<Literal, unit> =
            numberLiteral NumberLiteralOptions.AllowFraction "number" 
            |>> fun num -> 
                    if num.IsInteger then Int(int num.String)
                    else Float(float num.String)

        let hexNumber : Parser<Literal, unit> =    
            pstring "#x" >>. many1SatisfyL isHex "hex digit"
            |>> fun hexStr -> 
                    Hex(System.Convert.ToInt32(hexStr, 16)) 

        let binaryNumber : Parser<Literal, unit> =    
            pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
            |>> fun hexStr -> 
                    Bin(System.Convert.ToInt32(hexStr, 2))

        let numberLiteral : Parser<Literal, unit> =
            choiceL [numeralOrDecimal
                     hexNumber
                     binaryNumber]
                    "number literal"

        let strEscape = 
            pchar '\\' >>. pchar '\''

        let strInnard = 
            strEscape <|> noneOf "\'"

        let strInnards = 
            manyChars strInnard

        let strLiteral =  
            between (pchar '\'') (pchar '\'') strInnards
            |>> Str

        let literal : Parser<Literal, unit> = 
                (pstringCI "null" |>> (fun str -> Null))
            <|> numberLiteral
            <|> strLiteral

        let joinCondition : Parser<JoinCondition, unit> =
            spaces .>> pstring "ON" .>> spaces
            >>. fieldReference
            .>> spaces .>> pstring "=" .>> spaces
            .>>. fieldReference
            |>> (fun(lhs, rhs) -> { LeftHandSide = lhs; RightHandSide = rhs })

        let tableReferenceWithoutAlias : Parser<TableReferenceWithAlias, unit> =
            identifier
            |>> (fun (name) -> { Name = name; Alias = ""})

        let tableReferenceWithAlias : Parser<TableReferenceWithAlias, unit> =
            identifier
            .>> spaces .>> pstringCI "as" .>> spaces 
            .>>. identifier
            |>> (fun (name, alias) -> { Name = name; Alias = alias})

        let primaryTableReference : Parser<TableReferenceWithAlias, unit> =
            attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias

        let joinTableReference : Parser<JoinTableReference, unit> =
            identifier
            .>> spaces 
            .>>. (attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias)
            .>> spaces
            .>>. joinCondition
            |>> (fun ((joinTypeStr, tableRef), condition) -> { JoinType = toJoinType(joinTypeStr);
                                                               TableReference = tableRef; 
                                                               JoinCondition = condition } )

        let tableReferences : Parser<TableReferences, unit> =
            primaryTableReference
            .>> spaces
            .>>. many (joinTableReference .>> spaces)
            |>> (fun (pri, joinTables) -> { PrimaryTableReference = pri; 
                                            JoinTableReferences = joinTables; } )

        let whereConditionOperator : Parser<WhereOperator, unit> =
            choice [
                pstringCI "="
              ; pstringCI "is not"
              ; pstringCI "is"
              ; pstringCI "contains"
              ; pstringCI "like"
            ]
            |>> toWhereOperator


        let whereChainOperator : Parser<WhereOperator, unit> = 
            choice [
                pstringCI "and"
            ;   pstringCI "or"
            ]
            |>> toWhereOperator

        let whereCondition : Parser<WhereCondition, unit> =

            let leftHandSide : Parser<WhereLeftHandSide, unit> =
                fieldReference |>> FieldReferenceLH

            let rightHandSide : Parser<WhereRightHandSide, unit> =
                    (attempt fieldReference |>> FieldReferenceRH)  
                <|> (attempt variableReference |>> VariableReferenceRH)
                <|> (literal |>> LiteralRH)

            leftHandSide
            .>> ws1 .>>. whereConditionOperator .>> ws1
            .>>. rightHandSide
            |>> (fun((lhs, op), rhs) -> { LeftHandSide = lhs; 
                                          Operator = op; 
                                          RightHandSide = rhs })

        let singleConditionWhereClause : Parser<WhereStatement, unit> =
            pstringCI "where" .>> spaces
            >>. whereCondition
            |>> (fun (cond) -> { Operator = NoOp;
                                 SearchConditions = [ cond ] } );

        let multiConditionChainOperator : Parser<WhereOperator, unit> =
            pstring "(" .>> spaces >>. whereChainOperator .>> spaces
            <?> "where multi-condition operator"

        let multiConditionWhereList : Parser<WhereCondition list, unit> =
            sepEndBy1 (ws >>. whereCondition) (newline)
            <?> "where condition list"

        let multiConditionWhereClause : Parser<WhereStatement, unit> =
            pstringCI "where"
            .>> spaces 
            >>. between (pchar '(') (pchar ')') 
                        ( ws  >>. whereChainOperator .>> spaces1
                          .>>. multiConditionWhereList )
            |>> (fun (chainOp, conds) -> { Operator = chainOp; 
                                           SearchConditions = conds } )

        let whereClause : Parser<WhereStatement, unit> =
            (attempt multiConditionWhereClause)
            <|> singleConditionWhereClause

        let findStatement : Parser<FindStatement, unit> =
            spaces .>> pstringCI "find" .>> spaces
            >>. tableReferences
            |>> (fun (tableRef) -> { TableReferences = tableRef; } )

        let queryXml : Parser<QueryXml, unit> = 
            pstringCI "QUERY_XML" .>> newline
            >>. manyCharsTill anyChar eof
            |>> (fun (xmlStr) -> { Doc = XDocument.Parse(xmlStr) } )

    let parse input =  
        match run Parsers.findStatement input with
        | Success (x, _, _) -> x
        | Failure (x, _, _) -> raise <|  AlfabetParseError x


open FParsec

let input = @"WHERE
            ( AND
                ApplicationGroup.REFSTR CONTAINS  :BASE
                BV_1.Year = 2009
                BV_1.MonetaryCodeId = 'Commited'
                BV_3.Year = 2009
                BV_3.MonetaryCodeId = 'Commited'
                BV_4.Year = 2009
                BV_4.MonetaryCodeId = 'Commited'
            )"

let r = run AlfabetQueryParser.Parsers.multiConditionWhereClause input
4

1 回答 1

4

FParsec 无法为您的示例生成更有用的错误消息的原因是您已经使用原语定义了wsid解析器satisfy。由于您只指定了一个谓词函数,因此 FParsec 不知道如何描述预期的输入。用户指南解释了这个问题以及如何避免它。在您的代码中,您可以使用satisfyLmany1SatisfyL作为定义。

修复wsandid解析器后,您会很快发现您的代码无法正确解析列表,因为空格解析被搞砸了。在可能的情况下,您应该始终将空格解析为尾随空格,而不是前导空格,因为这样可以避免回溯。要修复您在上面给出的输入的解析器,您可以例如替换

sepEndBy1 (ws >>. whereCondition) (newline)

sepEndBy1 (whereCondition .>> ws) (newline >>. ws)

在 的定义中multiConditionWhereList

请注意,非空错误消息列表并不一定意味着错误,因为 FParsec 通常会收集在流中当前位置应用的所有解析器的错误消息,即使解析器是“可选的”。这可能是您看到“预期换行符”的原因,因为在该位置会接受换行符。

于 2014-01-02T13:07:00.223 回答