1

我正在使用Ruby::Parslet

我正在解析类似于 SV 接口的文档,例如:

interface my_intf;
  protocol validonly;

  transmit  [Bool]   valid;
  transmit  [Bool]   pipeid;
  transmit  [5:0]    incr;
  transmit  [Bool]   sample;

endinterface

这是我的解析器:

class myParse < Parslet::Parser
  rule(:lparen)     { space? >> str('(') >> space? }
  rule(:rparen)     { space? >> str(')') >> space? }
  rule(:lbox)       { space? >> str('[') >> space? }
  rule(:rbox)       { space? >> str(']') >> space? }
  rule(:lcurly)     { space? >> str('{') >> space? }
  rule(:rcurly)     { space? >> str('}') >> space? }
  rule(:comma)      { space? >> str(',') >> space? }
  rule(:semicolon)  { space? >> str(';') >> space? }
  rule(:eof)        { any.absent? }
  rule(:space)      { match["\t\s"] }
  rule(:whitespace) { space.repeat }
  rule(:space?)     { whitespace.maybe }
  rule(:blank_line) { space? >> newline.repeat(1) }
  rule(:newline)    { str("\n") }

  # Things
  rule(:integer)    { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
  rule(:identifier) { match['a-z'].repeat(1) }


  rule(:intf_start)     { space? >> str('interface') >> space? >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:intf_name) >> space? >> str(';') >> space? >> str("\n") }
  rule(:protocol)       { space? >> str('protocol') >> whitespace >> (str('validonly').maybe).as(:protocol) >> space? >> str(';') >> space? >> str("\n") }
  rule(:bool)           { lbox >> space? >> str('Bool').as(:bool) >> space? >> rbox }
  rule(:transmit_width) { lbox >> space? >> match('[0-9]').repeat.as(:msb) >> space? >> str(':') >> space? >> match('[0-9]').repeat.as(:lsb) >> space? >> rbox }
  rule(:transmit)       { space? >> str('transmit') >> whitespace >> (bool | transmit_width) >> whitespace >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:transmit_name) >> space? >> str(';') >> space? >> str("\n") }
  rule(:interface_body) { (protocol | blank_line.maybe) }
  rule(:interface)      { intf_start >> interface_body }

  rule(:expression)     { ( interface ).repeat }

  root :expression
end

我在制定规则时遇到问题interface_body

它可以有0个或更多transmit行和 0 或 1protocol行以及多个空格、注释等。

有人可以帮帮我吗?我在代码片段中编写的规则适用于 singletransmit和 singleprotocol,即它们正确匹配,但是当我解析整个界面时它不起作用。

提前致谢。

4

1 回答 1

1

好的...这会解析您提到的文件。我不明白所需的格式,所以我不能说它适用于你的所有文件,但希望这能让你开始。

require 'parslet'

class MyParse < Parslet::Parser
  rule(:lparen)     { space? >> str('(') }
  rule(:rparen)     { space? >> str(')') }
  rule(:lbox)       { space? >> str('[') }
  rule(:rbox)       { space? >> str(']') }
  rule(:lcurly)     { space? >> str('{') }
  rule(:rcurly)     { space? >> str('}') }
  rule(:comma)      { space? >> str(',') }
  rule(:semicolon)  { space? >> str(';') }
  rule(:eof)        { any.absent? }
  rule(:space)      { match["\t\s"] }
  rule(:whitespace) { space.repeat(1) }
  rule(:space?)     { space.repeat(0) }
  rule(:blank_line) { space? >> newline.repeat(1) }
  rule(:newline)    { str("\n") }

  # Things
  rule(:integer)    { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
  rule(:identifier) { match['a-z'].repeat(1) }

  def line( expression )
    space? >> 
    expression >>
    space? >> 
    str(';') >> 
    space? >> 
    str("\n")    
  end

  rule(:expression?)    { ( interface ).repeat(0) }

  rule(:interface)      { intf_start >> interface_body.repeat(0) >> intf_end }

  rule(:interface_body) { 
    intf_end.absent? >> 
    interface_bodyline >> 
    blank_line.repeat(0)
  }

  rule(:intf_start) { 
    line ( 
      str('interface')  >> 
      space? >> 
      ( match['a-zA-Z_'].repeat(1,1) >> 
        match['[:alnum:]_'].repeat(0)).as(:intf_name) 
    )
  }

  rule(:interface_bodyline) {
    line ( protocol | transmit )
  }

  rule(:protocol)       { 
    str('protocol') >> whitespace >> 
    (str('validonly').maybe).as(:protocol)
  }

  rule(:transmit)       {     
    str('transmit') >> whitespace >> 
    (bool | transmit_width) >> whitespace >> 
    name.as(:transmit_name)
  }

  rule(:name) {
    match('[a-zA-Z_]') >> 
    (match['[:alnum:]'] | str("_")).repeat(0)
  }

  rule(:bool)           { lbox  >> str('Bool').as(:bool) >> rbox }

  rule(:transmit_width) { 
    lbox   >> 
    space? >> 
    match('[0-9]').repeat(1).as(:msb) >> 
    space? >> 
    str(':') >> 
    space? >> 
    match('[0-9]').repeat(1).as(:lsb) >> 
    space? >> 
    rbox 
  }

  rule(:intf_end)       {  str('endinterface') }

  root :expression?
end

  require 'rspec'
  require 'parslet/rig/rspec'

  RSpec.describe MyParse  do
    let(:parser) { MyParse.new }
    context "simple_rule" do
      it "should consume protocol line" do
        expect(parser.interface_bodyline).to parse('  protocol validonly;
')
      end 
      it 'name' do
        expect(parser.name).to parse('valid')
      end
      it "bool" do
        expect(parser.bool).to parse('[Bool]')
      end 
      it "transmit line" do
        expect(parser.transmit).to parse('transmit [Bool] valid')
      end 
      it "transmit as bodyline'" do
        expect(parser.interface_bodyline).to parse('  transmit  [Bool]   valid;
')
      end 
    end
  end

  RSpec::Core::Runner.run(['--format', 'documentation'])  


begin 
  doc = File.read("test.txt")
  MyParse.new.parse(doc) 
  rescue Parslet::ParseFailed => error
    puts error.cause.ascii_tree
  end

主要变化...

  • 不要在令牌的两侧使用空格。您有将“[Bool] 有效”解析为 LBOX BOOL RBOX SPACE 的表达式?然后期待另一个 WHITESPACE 但找不到一个(因为之前的规则已经消耗了它)。

  • 当一个表达式可以有效地解析为零长度(例如带有repeat(0) 的东西)并且它的编写者存在问题时,您会得到一个奇怪的错误。规则通过并且不匹配,那么下一个规则通常会失败。我明确地将“正文线”匹配为“不是结束线”,因此它会因错误而失败。

  • 'repeat' 默认为 (0),我很想改变它。我一直看到这方面的错误。

  • x.repeat(1,1) 表示进行一次匹配。这与拥有 x 相同。:)

  • 有更多的空白问题

所以....

从上到下编写解析器。自下而上编写测试。当您的测试达到顶峰时,您就完成了!:)

祝你好运。

于 2016-09-05T13:19:46.470 回答