我正在尝试使用如下模式标记字符串。
>>> splitter = re.compile(r'((\w*)(\d*)\-\s?(\w*)(\d*)|(?x)\$?\d+(\.\d+)?(\,\d+)?|([A-Z]\.)+|(Mr)\.|(Sen)\.|(Miss)\.|.$|\w+|[^\w\s])')
>>> splitter.split("Hello! Hi, I am debating this predicament called life. Can you help me?")
我得到以下输出。有人可以指出我需要纠正什么吗?我对一堆“无”感到困惑。此外,如果有更好的方法来标记字符串,我真的很感激额外的帮助。
['', 'Hello', None, None, None, None, None, None, None, None, None, None, '', '!', None, None, None, None, None, None, None, None, None, None, ' ', 'Hi', None,None, None, None, None, None, None, None, None, None, '', ',', None, None, None, None, None, None, None, None, None, None, ' ', 'I', None, None, None, None, None, None, None, None, None, None, ' ', 'am', None, None, None, None, None, None,None, None, None, None, ' ', 'debating', None, None, None, None, None, None, None, None, None, None, ' ', 'this', None, None, None, None, None, None, None, None, None, None, ' ', 'predicament', None, None, None, None, None, None, None, None, None, None, ' ', 'called', None, None, None, None, None, None, None, None, None, None, ' ', 'life', None, None, None, None, None, None, None, None, None, None, '', '.', None, None, None, None, None, None, None, None, None, None, ' ', 'Can', None, None, None, None, None, None, None, None, None, None, ' ', 'you', None, None, None, None, None, None, None, None, None, None, ' ', 'help', None, None,None, None, None, None, None, None, None, None, ' ', 'me', None, None, None, None, None, None, None, None, None, None, '', '?', None, None, None, None, None, None, None, None, None, None, '']
我想要的输出是: -
['Hello', '!', 'Hi', ',', 'I', 'am', 'debating', 'this', 'predicament', 'called', 'life', '.', 'Can', 'you', 'help', 'me', '?']
谢谢你。