1

我正在尝试为 UCB 徽标制作基于令牌的 scala 解析器。我面临的问题是,在 Logo 中,列表中 UCB Logo 值中列出的任何表达式都可以由 ']'、'['、'' 之一分隔。如果有任何其他类型的分隔符,列表中的内容应被视为一个单词。

简而言之,我如何制作一个将考虑以下内容的标记解析器:
[ 4 3 2 ]- 应该是一个列表
[ [ 4 3 2 ] ]- 应该是一个列表中的一个列表
[ 1 + 2 ]- 应该是一个列表中的一个词 - 应该是一个列表
[ [ 1 2 3 ] + ]中的一个词

以下

 '[' ~ rep(chrExcept('[', ']')) ~ ']'

产生这些令牌: 令牌:List([, [1 2 3], +, ])

[ [ 1 2 3 ] + ]. 我相信它应该产生令牌: List([, [1 2 3] +, ])-> 将 + 符号与令牌合并[1 2 3]

这是我正在使用的词汇的当前代码:

package lexical

import scala.language.postfixOps

import scala.util.parsing.combinator.lexical.Lexical
import scala.util.parsing.input.CharSequenceReader._

/**
 * Created by Marin on 28/03/16.
*/
class MyLexical extends Lexical with MyTokens {

def token: Parser[Token] = (
    //procDef                                   ^^ { case first ~ chars => processNewProcedure(chars  mkString "") }
  word2 ^^ { case rest => {

      /*val s = if (second.isEmpty) "" else second mkString ""
      val t = if(third.isEmpty) "" else  third mkString ""
      val f = if(fourth.isEmpty) "" else fourth mkString ""

      StringLit(s"$first$s$t$f$rest")*/

      println(rest)
      StringLit("Smth")
  }
  }
  | formalChar ~ rep(identChar | digit)       ^^ { case first ~ rest => Formal(first :: rest mkString "") }
  | identChar ~ rep(identChar | digit)        ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
  | procDigit                                 ^^ { case first ~ second ~ rest => NumericLit((first mkString "") :: second.getOrElse("") :: rest mkString "") }
  | '\"' ~ rep(chrExcept('\"', EofCh)) ~ ' '  ^^ { case '\"' ~ chars ~ ' ' => StringLit(chars mkString "") }
  | EofCh                                     ^^^ EOF
  | delim
  | failure("Illegal character")
  )

def processNewProcedure(chars: String) =
    if(reserved.contains(chars)) throw new RuntimeException
    else {
        Identifier(chars)
    }

def procDef = toSeq ~> identChar ~ rep(identChar | elem('_')) <~ formalChar.* <~ endSeq

def toSeq = 't' ~ 'o' ^^^ "to"
def endSeq = 'e' ~ 'n' ~ 'd' ^^^ "end"

def processIdent(name: String) = {

    if (reserved contains name) {
        Keyword(name)
    } else {
        Identifier(name)
    }
}

def word = {

    '[' ~ ((whitespaceChar | digit)*) ~ (_delim | identChar) ~ rep(whitespaceChar | digit) ~ ']'
}

def word2 = {

    //'[' ~> rep(whitespaceChar | digit) ~> rep(_delim | identChar) <~ rep(whitespaceChar | digit) <~ ']'
    //'[' ~ rep(chrExcept('[', ']')) ~ ']'

    rep1('[') ~ rep1(chrExcept('[', ']') | digit) ~ rep(_delim) ~ rep1(']')

    //rep1('[') ~ identChar ~ rep(']') ~ rep('+') ~ rep1(']')
    //'[' ~ (_delim | chrExcept('[', ']')) ~ ']'
}

def word3 = {

    '[' ~> rep(digit | letter | _delim) <~ ']'
}

def procDigit = digit.+ ~ '.'.? ~ digit.*

def identChar = letter | elem('_')

def formalChar =  ':' ~ identChar

override def whitespace: Parser[Any] = rep[Any] (
    whitespaceChar
    | ';' ~ comment
)

def comment: Parser[Any] = rep(chrExcept(EofCh, ';')) ^^ { case _ => ' ' }


/****** Pure copy-paste ******/

/** The set of reserved identifiers: these will be returned as `Keyword`s. */
val reserved = new scala.collection.mutable.HashSet[String]

/** The set of delimiters (ordering does not matter). */
val delimiters = new scala.collection.mutable.HashSet[String]

private lazy val _delim: Parser[Token] = {
    // construct parser for delimiters by |'ing together the parsers for the individual delimiters,
    // starting with the longest one -- otherwise a delimiter D will never be matched if there is
    // another delimiter that is a prefix of D
    def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) }

    val d = new Array[String](delimiters.size)
    delimiters.copyToArray(d, 0)
    scala.util.Sorting.quickSort(d)
    (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x)
}
protected def delim: Parser[Token] = _delim
}
4

0 回答 0