我有一个 pyparsing 问题,我花了几天时间试图解决,但没有运气。
这是相关的伪代码:
class Parser(object):
def __init__(self):
self.multilineCommands = []
self.grammar = <pyparsing grammar> # depends on self.multilineCommands
所以,我试图让一组特定的文档测试通过。但是有问题的测试在self.multilineCommands
实例化后会更新。尽管正确设置属性没有问题,但self.grammar
似乎对更改视而不见,并且测试失败。
但是,如果我设置self.multilineCommands
inside __init__()
,那么测试都通过了。
我怎样才能了解self.grammar
最新消息self.multilineCommands
?
跟进
所以,这里的部分问题是我正在重构我没有编写的代码。我在 pyparsing 方面的经验也仅限于我在这个项目上的工作。
Pyparsing 作者 Paul McGuire 发布了一个有用的回复,但我无法让它发挥作用。这可能是我的错误,但更大的问题可能是我过度简化了上面编写的伪代码。
所以,我将发布实际代码。
警告!
您将要看到的内容是未经审查的。看到它可能会让你畏缩……甚至哭泣。在最初的模块中,这段代码只是整个“神级”的一部分。将下面的内容拆分到Parser
班级只是第 1 步(显然,第 1 步足以打破测试)。
class Parser(object):
'''Container object pyparsing-related parsing.
'''
def __init__(self, *args, **kwargs):
r'''
>>> c = Cmd()
>>> c.multilineCommands = ['multiline']
>>> c.multilineCommands
['multiline']
>>> c.parser.multilineCommands
['multiline']
>>> c.case_insensitive = True
>>> c.case_insensitive
True
>>> c.parser.case_insensitive
True
>>> print (c.parser('').dump())
[]
>>> print (c.parser('/* empty command */').dump())
[]
>>> print (c.parser('plainword').dump())
['plainword', '']
- command: plainword
- statement: ['plainword', '']
- command: plainword
>>> print (c.parser('termbare;').dump())
['termbare', '', ';', '']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- terminator: ;
>>> print (c.parser('termbare; suffx').dump())
['termbare', '', ';', 'suffx']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- suffix: suffx
- terminator: ;
>>> print (c.parser('barecommand').dump())
['barecommand', '']
- command: barecommand
- statement: ['barecommand', '']
- command: barecommand
>>> print (c.parser('COMmand with args').dump())
['command', 'with args']
- args: with args
- command: command
- statement: ['command', 'with args']
- args: with args
- command: command
>>> print (c.parser('command with args and terminator; and suffix').dump())
['command', 'with args and terminator', ';', 'and suffix']
- args: with args and terminator
- command: command
- statement: ['command', 'with args and terminator', ';']
- args: with args and terminator
- command: command
- terminator: ;
- suffix: and suffix
- terminator: ;
>>> print (c.parser('simple | piped').dump())
['simple', '', '|', ' piped']
- command: simple
- pipeTo: piped
- statement: ['simple', '']
- command: simple
>>> print (c.parser('double-pipe || is not a pipe').dump())
['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
- statement: ['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
>>> print (c.parser('command with args, terminator;sufx | piped').dump())
['command', 'with args, terminator', ';', 'sufx', '|', ' piped']
- args: with args, terminator
- command: command
- pipeTo: piped
- statement: ['command', 'with args, terminator', ';']
- args: with args, terminator
- command: command
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output into > afile.txt').dump())
['output', 'into', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- statement: ['output', 'into']
- args: into
- command: output
>>> print (c.parser('output into;sufx | pipethrume plz > afile.txt').dump())
['output', 'into', ';', 'sufx', '|', ' pipethrume plz', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- pipeTo: pipethrume plz
- statement: ['output', 'into', ';']
- args: into
- command: output
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output to paste buffer >> ').dump())
['output', 'to paste buffer', '>>', '']
- args: to paste buffer
- command: output
- output: >>
- statement: ['output', 'to paste buffer']
- args: to paste buffer
- command: output
>>> print (c.parser('ignore the /* commented | > */ stuff;').dump())
['ignore', 'the /* commented | > */ stuff', ';', '']
- args: the /* commented | > */ stuff
- command: ignore
- statement: ['ignore', 'the /* commented | > */ stuff', ';']
- args: the /* commented | > */ stuff
- command: ignore
- terminator: ;
- terminator: ;
>>> print (c.parser('has > inside;').dump())
['has', '> inside', ';', '']
- args: > inside
- command: has
- statement: ['has', '> inside', ';']
- args: > inside
- command: has
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline has > inside an unfinished command').dump())
['multiline', ' has > inside an unfinished command']
- multilineCommand: multiline
>>> print (c.parser('multiline has > inside;').dump())
['multiline', 'has > inside', ';', '']
- args: has > inside
- multilineCommand: multiline
- statement: ['multiline', 'has > inside', ';']
- args: has > inside
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command /* with comment in progress;').dump())
['multiline', ' command /* with comment in progress;']
- multilineCommand: multiline
>>> print (c.parser('multiline command /* with comment complete */ is done;').dump())
['multiline', 'command /* with comment complete */ is done', ';', '']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- statement: ['multiline', 'command /* with comment complete */ is done', ';']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command ends\n\n').dump())
['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- statement: ['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('multiline command "with term; ends" now\n\n').dump())
['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- statement: ['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('what if "quoted strings /* seem to " start comments?').dump())
['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
- statement: ['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
'''
# SETTINGS
self._init_settings()
# GRAMMAR
self._init_grammars()
# PARSERS
# For easy reference to all contained parsers.
# Hacky, I know. But I'm trying to fix code
# elsewhere at the moment... :P)
self._parsers = set()
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.add('afterElements')
# end intermission
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
# Put it all together:
self.mainParser = \
( self.prefixParser +
( self.stringEnd |
self.multilineParser |
self.singleLineParser |
self.blankLineTerminationParser |
self.multilineCommand + pyparsing.SkipTo(
self.stringEnd,
ignore=self.doNotParse)
)
)
self.mainParser.ignore(self.commentGrammars)
#self.mainParser.setDebug(True)
# And we've got mainParser.
#
# SPECIAL METHODS
#
def __call__(self, *args, **kwargs):
'''Call an instance for convenient parsing. Example:
p = Parser()
result = p('some stuff for p to parse')
This just calls `self.parseString()`, so it's safe to
override should you choose.
'''
return self.parseString(*args, **kwargs)
def __getattr__(self, attr):
# REMEMBER: This is only called when normal attribute lookup fails
raise AttributeError('Could not find {0!r} in class Parser'.format(attr))
@property
def multilineCommands(self):
return self._multilineCommands
@multilineCommands.setter
def multilineCommands(self, value):
value = list(value) if not isinstance(value, list) else value
self._multilineCommands = value
@multilineCommands.deleter
def multilineCommands(self):
del self._multilineCommands
self._multilineCommands = []
#
# PSEUDO_PRIVATE METHODS
#
def _init_settings(self, *args, **kwargs):
self._multilineCommands = []
self.abbrev = True # recognize abbreviated commands
self.blankLinesAllowed = False
self.case_insensitive = True
self.identchars = cmd.IDENTCHARS
self.legalChars = u'!#$%.:?@_' + pyparsing.alphanums + pyparsing.alphas8bit
self.noSpecialParse = {'ed','edit','exit','set'}
self.redirector = '>' # for sending output to file
self.reserved_words = []
self.shortcuts = {'?' : 'help' ,
'!' : 'shell',
'@' : 'load' ,
'@@': '_relative_load'}
self.terminators = [';']
self.keywords = [] + self.reserved_words
def _init_grammars(self, *args, **kwargs):
# Basic grammars
self.commentGrammars = (pyparsing.pythonStyleComment|pyparsing.cStyleComment).ignore(pyparsing.quotedString).suppress()
self.commentInProgress = '/*' + pyparsing.SkipTo( pyparsing.stringEnd ^ '*/' )
self.doNotParse = self.commentGrammars | self.commentInProgress | pyparsing.quotedString
self.fileName = pyparsing.Word(self.legalChars + '/\\')
self.inputFrom = self.fileName('inputFrom')
self.inputMark = pyparsing.Literal('<')
self.pipe = pyparsing.Keyword('|', identChars='|')
self.stringEnd = pyparsing.stringEnd ^ '\nEOF'
# Complex grammars
self.multilineCommand = pyparsing.Or([pyparsing.Keyword(c, caseless=self.case_insensitive) for c in self.multilineCommands ])('multilineCommand')
self.multilineCommand.setName('multilineCommand')
self.oneLineCommand = ( ~self.multilineCommand + pyparsing.Word(self.legalChars))('command')
# Hack-y convenience access to grammars
self._grammars = {
# Basic grammars
'commentGrammars',
'commentInProgress',
'doNotParse',
'fileName',
'inputFrom',
'inputMark',
'noSpecialParse',
'pipe',
'reserved_words',
'stringEnd',
# Complex grammars
'multilineCommand',
'oneLineCommand'
}
self.inputFrom.setParseAction(replace_with_file_contents)
self.inputMark.setParseAction(lambda x: '')
self.commentGrammars.addParseAction(lambda x: '')
if not self.blankLinesAllowed:
self.blankLineTerminator = (pyparsing.lineEnd * 2)('terminator')
if self.case_insensitive:
self.multilineCommand.setParseAction(lambda x: x[0].lower())
self.oneLineCommand.setParseAction(lambda x: x[0].lower())
def _init_all_parsers(self):
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.setName('afterElements')
self._grammars.add('afterElements')
# end intermission
# FIXME:
# For some reason it's necessary to set this again.
# (Otherwise pyparsing results include `outputTo`, but not `output`.)
self.outputParser('output')
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
def _init_prefixParser(self):
self.prefixParser = pyparsing.Empty()
self.prefixParser.setName('prefixParser')
self._parsers.add('prefixParser')
def _init_terminatorParser(self):
self.terminatorParser = pyparsing.Or([ (hasattr(t, 'parseString') and t) or pyparsing.Literal(t) for t in self.terminators])('terminator')
self.terminatorParser.setName('terminatorParser')
self._parsers.add('terminatorParser')
def _init_saveParser(self):
self.saveparser = (pyparsing.Optional(pyparsing.Word(pyparsing.nums)|'*')('idx') +
pyparsing.Optional(pyparsing.Word(self.legalChars + '/\\'))('fname') +
pyparsing.stringEnd)
self.saveparser.setName('saveParser')
self._parsers.add('saveParser')
def _init_outputParser(self):
# outputParser = (pyparsing.Literal('>>') | (pyparsing.WordStart() + '>') | pyparsing.Regex('[^=]>'))('output')
self.outputParser = self.redirector * 2 | (pyparsing.WordStart() + self.redirector) | pyparsing.Regex('[^=]' + self.redirector)('output')
self.outputParser.setName('outputParser')
self._parsers.add('outputParser')
def _init_inputParser(self):
# a not-entirely-satisfactory way of distinguishing < as in "import from" from <
# as in "lesser than"
self.inputParser = self.inputMark + \
pyparsing.Optional(self.inputFrom) + \
pyparsing.Optional('>') + \
pyparsing.Optional(self.fileName) + \
(pyparsing.stringEnd | '|')
self.inputParser.ignore(self.commentInProgress)
self.inputParser.setName('inputParser')
self._parsers.add('inputParser')
def _init_blankLineTerminationParser(self):
self.blankLineTerminationParser = pyparsing.NoMatch
if not self.blankLinesAllowed:
self.blankLineTerminationParser = ((self.multilineCommand ^ self.oneLineCommand) + pyparsing.SkipTo(self.blankLineTerminator, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args') + self.blankLineTerminator )
# FIXME: Does this call *really* have to be reassigned into the variable???
self.blankLineTerminationParser = self.blankLineTerminationParser.setResultsName('statement')
self.blankLineTerminationParser.setName('blankLineTerminationParser')
self._parsers.add('blankLineTerminationParser')
def _init_multilineParser(self):
#self.multilineParser = self.multilineParser.setResultsName('multilineParser')
self.multilineParser = (
(
(self.multilineCommand('multilineCommand') ^ self.oneLineCommand)
+ pyparsing.SkipTo(self.terminatorParser, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args')
+ self.terminatorParser
)('statement')
+ pyparsing.SkipTo(
self.outputParser ^ self.pipe ^ self.stringEnd, ignore=self.doNotParse
).setParseAction(lambda x: x[0].strip())('suffix')
+ self.afterElements)
self.multilineParser.ignore(self.commentInProgress)
self.multilineParser.setName('multilineParser')
self._parsers.add('multilineParser')
def _init_singleLineParser(self):
#self.singleLineParser = self.singleLineParser.setResultsName('singleLineParser')
self.singleLineParser = ((self.oneLineCommand + pyparsing.SkipTo(self.terminatorParser ^ self.stringEnd ^ self.pipe ^ self.outputParser, ignore=self.doNotParse).setParseAction(lambda x:x[0].strip())('args'))('statement') +
pyparsing.Optional(self.terminatorParser) + self.afterElements)
self.singleLineParser.setName('singleLineParser')
self._parsers.add('singleLineParser')
def _init_optionParser(self):
# Different from the other parsers.
# This one is based on optparse.OptionParser,
# not pyparsing.
#
# It's included here to keep all parsing-related
# code under one roof.
# TODO: Why isn't this using cmd2's OptionParser?
self.optionParser = optparse.OptionParser()
self._parsers.add('optionParser')
def parseString(self, *args, **kwargs):
'''Parses a string using `self.mainParser`.'''
return self.mainParser.parseString(*args, **kwargs)
你有它。残酷的事实。☺
2012-11-12 编辑:我在这个问题的原始标题中错误地使用了术语“类属性”。这是一个愚蠢的错误,对于任何混淆,我深表歉意。现在已更正为“实例属性”。