1

将 outlook.msg 正文提取到文本文件

def getEmailBodyFromMsg():

mapi.MAPIInitialize ((mapi.MAPI_INIT_VERSION, 0)) 
storage_flags = win32com.storagecon.STGM_DIRECT | win32com.storagecon.STGM_READ | win32com.storagecon.STGM_SHARE_EXCLUSIVE 
filepathList = glob.glob('*.msg')

for filepath in filepathList :

    txtFilepath = os.path.splitext(ntpath.basename(filepath))[0]
    resultFile = txtFilepath + datetime.now().strftime('%Y-%m-%d %H_%M_%S')+".txt"

    #get body of email and save as txt
    storage = pythoncom.StgOpenStorage (filepath, None, storage_flags, None, 0) 
    mapi_session = mapi.OpenIMsgSession () 
    message = mapi.OpenIMsgOnIStg (mapi_session, None, storage, None, 0, mapi.MAPI_UNICODE)

    #write to txt file
    CHUNK_SIZE = 10000 
    stream = message.OpenProperty (win32com.mapi.mapitags.PR_BODY, pythoncom.IID_IStream, 0, 0) 
    text = u"" 
    while True: 
        bytes = stream.read (CHUNK_SIZE) 
        if bytes: 
            text += bytes
        else: 
            break 
    with codecs.open(resultFile, mode='w', encoding='utf-8') as a_file:
        a_file.write(text)

打开上面写入的文件以搜索文本,这些行如下:

    with  codecs.open(absFilepath, 'rb', encoding='utf-8') as inFile :
        for index, line in enumerate(inFile) :
            mymatch =  re.search(csResultEmailPattern, line, re.UNICODE)

#line = 'R\x00e\x00s\x00u\x00l\x00t\x00s\x00 \x00f\x00r\x00o\x00m\x00\n' #OR 线 = u'R\x00e\x00s\x00u\x00l\x00t\ x00s\x00 \x00f\x00r\x00o\x00m\x00\r'

我想知道它们是否是指定正则表达式(如 resultEmailPattern = ur'Results' 之类的有效方法,该正则表达式将与上面的 'Rx00e..line 匹配或更好的方式来编码 txt 文件

4

0 回答 0