0

我需要绑定标点符号功能,以便在打印文件中的文本时不使用标点符号。取一行:"How are you today?"

到目前为止,打印:

"how
are
you
today?"

但我想像这样打印它:

how
are
you
today

我的代码如下所示:

from scanner import *
import sys
import string

def processFile(filename):
    s = Scanner(filename)
    token = s.readtoken()
    array = []
    while token != "":
        newToken = ""
        for i in range(0,len(token),1):
            newchar = RawChar(token[i])
            newToken = newToken + newchar
        array.append(newToken)
        token = s.readtoken()
    s.close()
    return array

def eachLine(tokens):
    for i in range(0,len(tokens),1):
        pun(tokens[i])
        print(tokens[i])
    return

def pun(string):
    punctuation = ["`","~","!","@","#","$","%","^","&","*","(",")","_","-","+","=","{","[","}","]","|",":",";","\"","'","<",",",">",".","?","/"]
    for i in string:
        newString = ""
        if i not in string:
            newString = newString + i
    return newString

def RawChar(char):
    if char == "A":
        char = "a"
    elif char == "B":
        char = "b"
    elif char == "C":
        char = "c"
    elif char == "D":
        char = "d"
    elif char == "E":
        char = "e"
    elif char == "F":
        char = "f"
    elif char == "G":
        char = "g"
    elif char == "H":
        char = "h"
    elif char == "I":
        char = "i"
    elif char == "J":
        char = "j"
    elif char == "K":
        char = "k"
    elif char == "L":
        char = "l"
    elif char == "M":
        char = "m"
    elif char == "N":
        char = "n"
    elif char == "O":
        char = "o"
    elif char == "P":
        char = "p"
    elif char == "Q":
        char = "q"
    elif char == "R":
        char = "r"
    elif char == "S":
        char = "s"
    elif char == "T":
        char = "t"
    elif char == "U":
        char = "u"
    elif char == "V":
        char = "v"
    elif char == "W":
        char = "w"
    elif char == "X":
        char = "x"
    elif char == "Y":
        char = "y"
    elif char == "Z":
        char = "z"
    return char

def main():
    newForm = processFile(sys.argv[1])
    eachLine(newForm)

main()

有什么建议放在哪里def pun(string)

4

3 回答 3

7

要从字符串中删除标点符号,请使用str.translate

In [124]: import string

In [126]: string.punctuation
Out[126]: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [127]: '"How are you today?"'.translate(None, string.punctuation)
Out[127]: 'How are you today'
于 2013-03-17T02:47:23.680 回答
1

您可以使用此 stackoverflow 文章中显示的技术显着改进标点符号剥离。然后使用 s.lower() 将字符串 s 小写。

于 2013-03-17T02:48:20.950 回答
0
import string
s = '"Right now!" she shouted, and hands fluttered in the air - amid a few cheers - for about two minutes.'
x = "".join([c for c in s if or c not in string.punctuation])
于 2013-07-22T06:57:58.787 回答