2

我目前正在制作一种编程语言,并使用 PegJS 进行解析。这是我的语法:

Start
  = __ program:Program __ { return program; }

// ----- A.1 Lexical Grammar -----

SourceCharacter
  = .

WhiteSpace "whitespace"
  = "\t"
  / "\v"
  / "\f"
  / " "
  / "\u00A0"
  / "\uFEFF"

LineTerminator
  = [\n\r\u2028\u2029]

LineTerminatorSequence "end of line"
  = "\n"
  / "\r\n"
  / "\r"
  / "\u2028"
  / "\u2029"

Comment "comment"
  = MultiLineComment
  / SingleLineComment

MultiLineComment
  = "/*" (!"*/" SourceCharacter)* "*/"

MultiLineCommentNoLineTerminator
  = "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/"

SingleLineComment
  = "//" (!LineTerminator SourceCharacter)*

Identifier
  = !ReservedWord name:IdentifierName { return name; }

IdentifierName "identifier"
  = head:IdentifierStart tail:IdentifierPart* {
      return {
        type: "Identifier",
        name: head + tail.join("")
      };
    }

IdentifierStart
  = UnicodeLetter
  / "_"

IdentifierPart
  = IdentifierStart
  / UnicodeDigit
  / "\u200C"
  / "\u200D"

UnicodeLetter
  = [a-zA-Z]

UnicodeDigit
  = [0-9]

ReservedWord
  = Keyword
  / FutureReservedWord
  / NullLiteral
  / BooleanLiteral

Keyword
  = BreakToken
  / CaseToken
  / CatchToken
  / ContinueToken
  / DebuggerToken
  / DefaultToken
  / DeleteToken
  / DoToken
  / ElseToken
  / FinallyToken
  / ForToken
  / FunctionToken
  / IfToken
  / InstanceofToken
  / InToken
  / NewToken
  / ReturnToken
  / SwitchToken
  / ThisToken
  / ThrowToken
  / TryToken
  / TypeofToken
  / VarToken
  / VoidToken
  / WhileToken
  / WithToken
  / GlobalToken
  / ModulusToken
  / QuotientToken
  / ANDToken
  / NOTToken
  / ORToken
  / EndWhileToken
  / ToToken
  / NextToken
  / UntilToken
  / EndIfToken
  / ElseIfToken
  / ThenToken
  / EndSwitchToken
  / EndFunctionToken
  / EndProcedureToken
  / ProcedureToken
  / ArrayToken

FutureReservedWord
  = ClassToken
  / ConstToken
  / EnumToken
  / ExportToken
  / ExtendsToken
  / ImportToken
  / SuperToken

Literal
  = NullLiteral
  / BooleanLiteral
  / NumericLiteral
  / StringLiteral

NullLiteral
  = NullToken { return { type: "Literal", value: null, valType: "null" }; }

BooleanLiteral
  = TrueToken  { return { type: "Literal", value: true, valType: "bool"  }; }
  / FalseToken { return { type: "Literal", value: false, valType: "bool" }; }

// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
// grammar, it comes from text in section 7.8.3.
NumericLiteral "number"
  = literal:DecimalLiteral !DecimalDigit {
      return literal;
    }

DecimalLiteral
  = DecimalIntegerLiteral "." DecimalDigit* {
      return { type: "Literal", value: parseFloat(text()), valType: "float" };
    }
  / "." DecimalDigit+ {
      return { type: "Literal", value: parseFloat(text()), valType: "float" };
    }
  / DecimalIntegerLiteral {
      return { type: "Literal", value: parseFloat(text()), valType: "int" };
    }

DecimalIntegerLiteral
  = "0"
  / NonZeroDigit DecimalDigit*

DecimalDigit
  = [0-9]

NonZeroDigit
  = [1-9]

ExponentPart
  = ExponentIndicator SignedInteger

ExponentIndicator
  = "e"i

SignedInteger
  = [+-]? DecimalDigit+

StringLiteral "string"
  = '"' chars:DoubleStringCharacter* '"' {
      return { type: "Literal", value: chars.join(""), valType: "string" };
    }
  / "'" chars:SingleStringCharacter* "'" {
      return { type: "Literal", value: chars.join(""), valType: "string" };
    }

DoubleStringCharacter
  = !('"' / "\\" / LineTerminator) SourceCharacter { return text(); }
  / "\\" sequence:EscapeSequence { return sequence; }
  / LineContinuation

SingleStringCharacter
  = !("'" / "\\" / LineTerminator) SourceCharacter { return text(); }
  / "\\" sequence:EscapeSequence { return sequence; }
  / LineContinuation

LineContinuation
  = "\\" LineTerminatorSequence { return ""; }

EscapeSequence
  = CharacterEscapeSequence
  / "0" !DecimalDigit { return "\0"; }

CharacterEscapeSequence
  = SingleEscapeCharacter
  / NonEscapeCharacter

SingleEscapeCharacter
  = "'"
  / '"'
  / "\\"
  / "b"  { return "\b"; }
  / "f"  { return "\f"; }
  / "n"  { return "\n"; }
  / "r"  { return "\r"; }
  / "t"  { return "\t"; }
  / "v"  { return "\v"; }

NonEscapeCharacter
  = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }

EscapeCharacter
  = SingleEscapeCharacter
  / DecimalDigit
  / "x"
  / "u"

BreakToken      = body:"break" !IdentifierPart {return body}
CaseToken       = "case"       !IdentifierPart
CatchToken      = "catch"      !IdentifierPart
ClassToken      = "class"      !IdentifierPart
ConstToken      = "const"      !IdentifierPart
ContinueToken = body:"continue"!IdentifierPart {return body}
DebuggerToken   = "debugger"   !IdentifierPart
DefaultToken    = "default"    !IdentifierPart
DeleteToken     = "delete"     !IdentifierPart
DoToken         = "do"         !IdentifierPart
ElseIfToken     = "elseif"     !IdentifierPart
ElseToken       = "else"       !IdentifierPart
EnumToken       = "enum"       !IdentifierPart
ExportToken     = "export"     !IdentifierPart
ExtendsToken    = "extends"    !IdentifierPart
FalseToken      = "false"      !IdentifierPart
FinallyToken    = "finally"    !IdentifierPart
ForToken        = "for"        !IdentifierPart
FunctionToken   = "function"   !IdentifierPart
GetToken        = "get"        !IdentifierPart
IfToken         = "if"         !IdentifierPart
ImportToken     = "import"     !IdentifierPart
InstanceofToken = "instanceof" !IdentifierPart
InToken         = "in"         !IdentifierPart
NewToken        = "new"        !IdentifierPart
NullToken       = "none"       !IdentifierPart
ReturnToken     = body:"return"     !IdentifierPart {return body}
SetToken        = "set"        !IdentifierPart
SuperToken      = "super"      !IdentifierPart
SwitchToken     = "switch"     !IdentifierPart
ThisToken       = "this"       !IdentifierPart
ThrowToken      = "throw"      !IdentifierPart
TrueToken       = "true"       !IdentifierPart
TryToken        = "try"        !IdentifierPart
TypeofToken     = "typeof"     !IdentifierPart
VarToken        = "var"        !IdentifierPart
VoidToken       = "void"       !IdentifierPart
WhileToken      = "while"      !IdentifierPart
WithToken       = "with"       !IdentifierPart
GlobalToken     = "global"     !IdentifierPart
ModulusToken    = "MOD"        !IdentifierPart
QuotientToken   = "DIV"        !IdentifierPart
ANDToken        = "AND"        !IdentifierPart
ORToken         = "OR"         !IdentifierPart
NOTToken        = "NOT"        !IdentifierPart
EndWhileToken   = "endwhile"   !IdentifierPart
ToToken         = "to"         !IdentifierPart
NextToken       = "next"       !IdentifierPart
UntilToken      = "until"      !IdentifierPart
EndIfToken      = "endif"      !IdentifierPart
ThenToken       = "then"       !IdentifierPart
EndSwitchToken  = "endswitch"  !IdentifierPart
EndFunctionToken= "endfunction" !IdentifierPart
ProcedureToken  = "procedure"  !IdentifierPart
EndProcedureToken= "endprocedure"  !IdentifierPart
ArrayToken       = "array"         !IdentifierPart


// Skipped

___
  = (WhiteSpace / /*LineTerminatorSequence / Comment*/ MultiLineCommentNoLineTerminator)+
__
  = (WhiteSpace / LineTerminatorSequence / Comment)*

_
  = (WhiteSpace / MultiLineCommentNoLineTerminator)*

Program
  = __ body:StatementList __ {
  return {
    type: "Program",
    body: body
  }
  }

StatementList
  = (Statement)*

Statement
  = __ body:(VariableAssignment
  / GlobalAssignment
  / IterativeStatement
  / IndividualKeyword
  / IfBlock
  / SwitchBlock
  / FunctionCallMember
  / MemberExpression
  / FunctionCallNoMember
  / FunctionDefinition
  / ArrayDeclaration) __
  {
  return body
  }

IterativeStatement
  = WhileStatement / ForStatement / UntilStatement

MathematicalExpression = additive

additive = left:multiplicative _ atag:("+" / "-") _ right:additive { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / multiplicative

multiplicative = left:exponential _ atag:("*" / "/" / "MOD" / "DIV") _ right:multiplicative { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / exponential

exponential = left:primary _ atag:("^") _ right:exponential { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / primary

primary = (DirectValueNoEq) / "(" additive:additive ")" { return additive; }

LogicalExpression = operative

operative = left:negative _ atag:("AND" / "OR") _ right:operative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / negative

negative = atag:("NOT") _ right:negative { return {type: "LogicalExpression", operator: atag, right:right}; } / comparative

comparative = left:primaryLogic _ atag:("==" / "!=" / ">=" / ">" / "<=" / "<") _ right:comparative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / primaryLogic

primaryLogic = (DirectValue) / "(" operative:operative ")" { return operative; }


DirectValue
 = MathematicalExpression
 / DirectValueNoEq

DirectValueNoEq
 = FunctionCallMember
 / MemberExpression
 / FunctionCallNoMember
 / Identifier
 / Literal

DirectValueNoMember
 = FunctionCallNoMember
 / Identifier
 / Literal

AllowedDefArg
  = VariableAssignment
  / Identifier

FuncArgumentList
  =  help:((AllowedDefArg)?) main:((FuncArgument)*)
  {
  if (help === null) {
  return main
  }
  else {
  return [help].concat(main)
  }
  }
FuncArgument
  = _ "," _ body:AllowedDefArg _
  {
  return body
  }
ArgumentList
  =  help:((DirectValue)?) main:((Argument)*)
  {
  if (help === null) {
  return main
  }
  else {
  return [help].concat(main)
  }
  }
Argument
  = _ "," _ body:DirectValue _
  {
  return body
  }

VariableAssignment
  = left:(MemberExpression / Identifier) _ "=" _ right:DirectValue
  {
  return {
  type: "VariableAssignment",
  left: left,
  right: right
  }
  }

GlobalAssignment
  = GlobalToken ___ left:Identifier _ "=" _ right:DirectValue
  {
  return {
  type: "GlobalAssignment",
  left: left,
  right: right
  }
  }

IfBlock
  = IfToken ___ condition:LogicalExpression ___ ThenToken _
  LineTerminatorSequence
  body:StatementList
  alternative:(ElseIfBlock / ElseBlock)
  {
  return {
  type: "IfBlock",
  condition: condition,
  body: body,
  alternative: alternative
  }
  }

ElseIfBlock
  = ElseIfToken ___ condition:LogicalExpression ___ ThenToken _
  LineTerminatorSequence
  body:StatementList
  alternative:(ElseIfBlock / ElseBlock)
  {
  return {
  type: "ElseIfBlock",
  condition: condition,
  body: body,
  alternative: alternative
  }
  }

ElseBlock
  = ElseToken _
  LineTerminatorSequence
  body:StatementList
  EndIfToken
  {
  return {
  type: "ElseBlock",
  body: body
  }
  }
  / EndIfToken
  {
  return null
  }

SwitchBlock
  = SwitchToken ___ value:DirectValue _ ":" _
  LineTerminatorSequence
  attachedCase:(CaseBlock / DefaultBlock)
  {
  return {
  type: "SwitchBlock",
  value: value,
  attachedCase: attachedCase
  }
  }

CaseBlock
  = CaseToken ___ value:DirectValue _ ":" _
  LineTerminatorSequence
  body:StatementList
  attachedCase:(CaseBlock / DefaultBlock)
  {
  return {
  type: "CaseBlock",
  value: value,
  body: body,
  attachedCase: attachedCase
  }
  }

DefaultBlock
  = DefaultToken _ ":" _
  LineTerminatorSequence
  body:StatementList
  EndSwitchToken
  {
  return {
  type: "DefaultBlock",
  body: body
  }
  }
  / EndSwitchToken
  {
  return null
  }

WhileStatement
  = WhileToken ___ condition:LogicalExpression _
  LineTerminatorSequence
  body:StatementList
  EndWhileToken
  {
  return {
  type: "WhileStatement",
  condition: condition,
  body: body
  }
  }

UntilStatement
  = DoToken _
  LineTerminatorSequence
  body:StatementList
  UntilToken ___ condition:LogicalExpression
  {
  return {
  type: "UntilStatement",
  condition: condition,
  body: body
  }
  }

ForStatement
  = ForToken ___ init:VariableAssignment ___ ToToken ___ end:DirectValue _
  LineTerminatorSequence
  body:StatementList
  NextToken ___ iden:Identifier
  {
  return {
  type: "ForStatement",
  init: init,
  end: end,
  body: body,
  iden: iden
  }
  }

/*MemberFunctionCall
  = callee:("subString") _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "MemberFunctionCall",
  callee: callee,
  arg: arg
  }
  }

BuiltInMemberProperty
  = property:("length")
  {
  return {
  type: "BuiltInMemberProperty",
  property: property
  }
  }*/

FunctionDefinition
 = FunctionToken ___ name:Identifier _ "(" _ arg:FuncArgumentList _ ")" _
 LineTerminatorSequence
 body:StatementList
 EndFunctionToken
 {
  return {
  type: "FunctionDefinition",
  name: name,
  arg: arg,
  body: body
  }
  }


ArrayDeclaration
  = ArrayToken ___ name:Identifier _ "[" _ arg:ArgumentList _ "]"
  {
  return {
  type: "ArrayDeclaration",
  name: name,
  arg: arg
  }
  }

FunctionCall
  = callee:(MemberExpression / Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

FunctionCallNoMember
  = callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

FunctionCallMember
  = callee:(MemberExpression) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

IndividualKeyword
  = keyword:(BreakToken / ContinueToken)
  {
  return {
  type: "IndividualKeyword",
  keyword: keyword
  }
  }
  / keyword:(ReturnToken) ___ value:DirectValue
  {
  return {
  type: "IndividualKeyword",
  keyword: keyword,
  value: value
  }
  }

MemberExpression
  = head:(
  DirectValueNoMember
  )
  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+
    {
      return tail.reduce(function(result, element) {
        return {
          type: "MemberExpression",
          object: result,
          property: element.property,
          computed: element.computed
        };
      }, head);
    }

有谁知道为什么我的成员表达式解析会遇到这种奇怪的怪癖?

one.two.three()工作得很好。

one().two.three也可以正常工作。

one.two().three也有效。

也是如此one[2].three.four()

one = two[3]().four不起作用。它说:

Line 1, column 15: Expected "*", "+", "-", "/", "/*", "DIV", "MOD", "^", "array", "break", "continue", "do", "false", "for", "function", "global", "if", "none", "return", "switch", "true", "while", comment, end of input, end of line, identifier, number, string, or whitespace but "." found.

有任何想法吗?它会以这种方式表现似乎有点奇怪。

4

1 回答 1

1

You don't allow anonymous function calls. Your MemberExpression requires function calls to be prefixed with an Identifier:

  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+

Here:

FunctionCallNoMember
  = callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

If you want to allow "anonymous" function calls, then you'll need to add an additional alternation for that. Perhaps something along these lines:

Please keep in mind that I haven't fully tested this solution, so it's very possible that it will cause wrinkles in other parts of your parser. I'm only showing where the problem lies and one possible way of addressing it.

MemberExpression
  = head:(
  DirectValueNoMember
  )
  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      /
        __ "(" _ arg:ArgumentList _ ")"
        {
          return {
            property: {
              type: "FunctionCall",
              arg: arg
            },
            computed: true
          }
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+
    {
      return tail.reduce(function(result, element) {
        return {
          type: "MemberExpression",
          object: result,
          property: element.property,
          computed: element.computed
        };
      }, head);
    }

which, given one = two[3]().four, would produce:

{
   "type": "Program",
   "body": [
      {
         "type": "VariableAssignment",
         "left": {
            "type": "Identifier",
            "name": "one"
         },
         "right": {
            "type": "MemberExpression",
            "object": {
               "type": "MemberExpression",
               "object": {
                  "type": "MemberExpression",
                  "object": {
                     "type": "Identifier",
                     "name": "two"
                  },
                  "property": [
                     {
                        "type": "Literal",
                        "value": 3,
                        "valType": "int"
                     }
                  ],
                  "computed": true
               },
               "property": {
                  "type": "FunctionCall",
                  "arg": []
               },
               "computed": true
            },
            "property": {
               "type": "Identifier",
               "name": "four"
            },
            "computed": false
         }
      }
   ]
}
于 2020-02-21T15:40:22.600 回答