0

I have a file that has one sentence per line. I am trying to read the file and search if the sentence is a question using regex and extract the wh-word from the sentences and save them back into another file according the order it appeared in the first file.

This is what I have so far..

def whWordExtractor(inputFile):
    try:
        openFileObject = open(inputFile, "r")
        try:

            whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
            with openFileObject as infile:
                for line in infile:

                    whWord = whPattern.search(line)
                    print whWord

# Save the whWord extracted from inputFile into another whWord.txt file
#                    writeFileObject = open('whWord.txt','a')                   
#                    if not whWord:
#                        writeFileObject.write('None' + '\n')
#                    else:
#                        whQuestion = whWord   
#                        writeFileObject.write(whQuestion+ '\n') 

        finally:
            print 'Done. All WH-word extracted.'
            openFileObject.close()
    except IOError:
        pass

The result after running the code above: set([])

Is there something I am doing wrong here? I would be grateful if someone can point it out to me.

4

3 回答 3

1

像这样的东西:

def whWordExtractor(inputFile):
   try:
      with open(inputFile) as f1:
           whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
           with open('whWord.txt','a') as f2:  #open file only once, to reduce I/O operations
               for line in f1:
                   whWord = whPattern.search(line)
                   print whWord
                   if not whWord:
                        f2.write('None' + '\n')
                   else:
                        #As re.search returns a sre.SRE_Match object not string, so you will have to use either
                        # whWord.group() or better use  whPattern.findall(line)
                        whQuestion = whWord.group()   
                        f2.write(whQuestion+ '\n') 
               print 'Done. All WH-word extracted.' 
   except IOError:
        pass
于 2013-05-15T05:42:24.277 回答
0

不确定它是否是你要找的,但你可以尝试这样的事情:

def whWordExtractor(inputFile):
    try:
        whPattern = re.compile(r'who|what|how|where|when|why|which|whom|whose', re.IGNORECASE)
        with open(inputFile, "r") as infile:
            for line in infile:
                whMatch = whPattern.search(line)
                if whMatch:
                    whWord = whMatch.group()
                    print whWord
                    # save to file
                else:
                    # no match
    except IOError:
        pass
于 2013-05-15T05:36:53.040 回答
0

更改'(.*)who|what|how|where|when|why|which|whom|whose(\.*)'".*(?:who|what|how|where|when|why|which|whom|whose).*\."

于 2013-05-15T05:19:05.590 回答