0

我正在尝试使用twisted在gmail帐户上获取电子邮件,至少可以说这很痛苦,查看电子邮件是它们的清晰解释和结构(充其量似乎是一起破解的)。我正在尝试获取附件,但附件在任何地方都看不到。

我正在使用来自扭曲和修改的示例 IMAP 客户端,我正在使用 fetchAll('1: ') 获取电子邮件,然后获取第一封电子邮件,但我找不到该电子邮件上的电子邮件附件(我检查了它在谷歌那里)。还有什么是 1:而且我似乎找不到任何可以真正解释电子邮件的东西(好像没有人理解它)

所以 Stackoverflow,我错过了什么?

代码

#!/usr/bin/env python

# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.


"""
    Simple IMAP4 client which displays the subjects of all messages in a
    particular mailbox.
    """

import sys

from twisted.internet import protocol
from twisted.internet import ssl
from twisted.internet import defer
from twisted.internet import stdio
from twisted.mail import imap4
from twisted.protocols import basic
from twisted.python import util
from twisted.python import log



class TrivialPrompter(basic.LineReceiver):
    from os import linesep as delimiter

    promptDeferred = None

    def prompt(self, msg):
        assert self.promptDeferred is None
        self.display(msg)
        self.promptDeferred = defer.Deferred()
        return self.promptDeferred

    def display(self, msg):
        self.transport.write(msg)

    def lineReceived(self, line):
        if self.promptDeferred is None:
            return
        d, self.promptDeferred = self.promptDeferred, None
        d.callback(line)



class SimpleIMAP4Client(imap4.IMAP4Client):
    """
        A client with callbacks for greeting messages from an IMAP server.
        """
    greetDeferred = None

    def serverGreeting(self, caps):
        self.serverCapabilities = caps
        if self.greetDeferred is not None:
            d, self.greetDeferred = self.greetDeferred, None
            d.callback(self)



class SimpleIMAP4ClientFactory(protocol.ClientFactory):
    usedUp = False

    protocol = SimpleIMAP4Client


    def __init__(self, username, onConn):
        self.ctx = ssl.ClientContextFactory()

        self.username = username
        self.onConn = onConn


    def buildProtocol(self, addr):
        """
            Initiate the protocol instance. Since we are building a simple IMAP
            client, we don't bother checking what capabilities the server has. We
            just add all the authenticators twisted.mail has.  Note: Gmail no
            longer uses any of the methods below, it's been using XOAUTH since
            2010.
            """
        assert not self.usedUp
        self.usedUp = True

        p = self.protocol(self.ctx)
        p.factory = self
        p.greetDeferred = self.onConn

        p.registerAuthenticator(imap4.PLAINAuthenticator(self.username))
        p.registerAuthenticator(imap4.LOGINAuthenticator(self.username))
        p.registerAuthenticator(
                                imap4.CramMD5ClientAuthenticator(self.username))

        return p


    def clientConnectionFailed(self, connector, reason):
        d, self.onConn = self.onConn, None
        d.errback(reason)



def cbServerGreeting(proto, username, password):
    """
        Initial callback - invoked after the server sends us its greet message.
        """
    # Hook up stdio
    tp = TrivialPrompter()
    stdio.StandardIO(tp)

    # And make it easily accessible
    proto.prompt = tp.prompt
    proto.display = tp.display

    # Try to authenticate securely
    return proto.authenticate(password
                              ).addCallback(cbAuthentication, proto
                                            ).addErrback(ebAuthentication, proto, username, password
                                                         )


def ebConnection(reason):
    """
        Fallback error-handler. If anything goes wrong, log it and quit.
        """
    log.startLogging(sys.stdout)
    log.err(reason)
    return reason


def cbAuthentication(result, proto):
    """
        Callback after authentication has succeeded.

        Lists a bunch of mailboxes.
        """
    return proto.list("", "*"
                      ).addCallback(cbMailboxList, proto
                                    )


def ebAuthentication(failure, proto, username, password):
    """
        Errback invoked when authentication fails.

        If it failed because no SASL mechanisms match, offer the user the choice
        of logging in insecurely.

        If you are trying to connect to your Gmail account, you will be here!
        """
    failure.trap(imap4.NoSupportedAuthentication)
    return proto.prompt(
                        "No secure authentication available. Login insecurely? (y/N) "
                        ).addCallback(cbInsecureLogin, proto, username, password
                                      )


def cbInsecureLogin(result, proto, username, password):
    """
        Callback for "insecure-login" prompt.
        """
    if result.lower() == "y":
        # If they said yes, do it.
        return proto.login(username, password
                           ).addCallback(cbAuthentication, proto
                                         )
    return defer.fail(Exception("Login failed for security reasons."))


def cbMailboxList(result, proto):
    """
        Callback invoked when a list of mailboxes has been retrieved.
        """
    result = [e[2] for e in result]
    s = '\n'.join(['%d. %s' % (n + 1, m) for (n, m) in zip(range(len(result)), result)])
    if not s:
        return defer.fail(Exception("No mailboxes exist on server!"))
    return proto.prompt(s + "\nWhich mailbox? [1] "
                        ).addCallback(cbPickMailbox, proto, result
                                      )


def cbPickMailbox(result, proto, mboxes):
    """
        When the user selects a mailbox, "examine" it.
        """
    mbox = mboxes[int(result or '1') - 1]
    return proto.examine(mbox
                         ).addCallback(cbExamineMbox, proto
                                       )


def cbExamineMbox(result, proto):
    """
        Callback invoked when examine command completes.

        Retrieve the subject header of every message in the mailbox.
        """
    # FETCH ALL HEADERS? WHERE IS A ONE FOR AN ATTACHMENT
    return proto.fetchAll('1:*').addCallback(cbFetch, proto)


def cbFetch(result, proto):
    """
        Finally, display headers.
        """
    if result:
        keys = result.keys()
        keys.sort()
        k = keys[-1]
        proto.display('%s %s' % (k, result[k]))
    else:
        print "Hey, an empty mailbox!"

    return proto.logout()


def cbClose(result):
    """
        Close the connection when we finish everything.
        """
    from twisted.internet import reactor
    reactor.stop()


def main():
    hostname = raw_input('IMAP4 Server Hostname: ')
    port = raw_input('IMAP4 Server Port (the default is 143, 993 uses SSL): ')
    username = raw_input('IMAP4 Username: ')
    password = util.getPassword('IMAP4 Password: ')

    onConn = defer.Deferred(
    ).addCallback(cbServerGreeting, username, password
                  ).addErrback(ebConnection
                               ).addBoth(cbClose)

    factory = SimpleIMAP4ClientFactory(username, onConn)

    from twisted.internet import reactor
    if port == '993':
        reactor.connectSSL(hostname, int(port), factory, ssl.ClientContextFactory())
    else:
        if not port:
            port = 143
        reactor.connectTCP(hostname, int(port), factory)
    reactor.run()


if __name__ == '__main__':
    main()
4

2 回答 2

5

首先,imap4 是一个(可能是不必要的)复杂的邮件处理协议,并且为了完全支持该协议,twisted 的客户端实现(必然)很复杂。为了获得更多信息,您应该考虑花一些时间阅读解释协议的标准:rfc3501twisted api的相关部分。

也就是说,看起来您正在使用IMAP4Client.fetchAll(),但自相矛盾的是,它会获取“信封”数据、有关消息的标头和元数据。返回电子邮件正文的类似调用实际上也是fetchFull().

于 2013-01-04T04:49:09.703 回答
0

感谢 SingleNegationElimination 和 Jean-Paul 对 RFC3501 和 RFC822 的指针,我想我对如何实现这一点有了更好的理解。

我找到的解决fetchSpecific 方案imap4. 您可以指定要检索邮件的哪个“部分”(即收件箱中的邮件)。对于附件,附加文件的内容嵌入在TEXT零件中并使用 base64 编码。在我的特定示例中,邮件中有一个 pdf 附件,并且

proto.fetchSpecific(imap4.MessageSet(247), uid=True, headerType='TEXT').addCallback(cbViewAttachment, proto)

def cbViewAttachment(result, proto):
    for k, value in result.items():
        print(value[0][4][:400])
        ...

给出类似的东西

------_CANON_2007111239350128_
Content-Type: Application/pdf;
 name="0128_20200711123935_001.pdf"
Content-Disposition: attachment;
 filename="0128_20200711123935_001.pdf"
Content-Transfer-Encoding: base64

JVBERi0xLjYKJeLjz9MNCjEgMCBvYmoKPDwgCi9DcmVhdGlvbkRhdGUgKEQ6MjAyMDA3MTEyMDM5
MThaMDAnMDAnKQovQ3JlYXRvciAoXDM3NlwzNzdcMDAwQ1wwMDBhXDAwMG5cMDAwb1wwMDBuXDAw
MCBcMDAwTVwwMDBGXDAwMDJcMDAwNFw

一些快速的解释。上面的代码确实获取了TEXTUID = 247 的消息部分。调用成功后,将调用fetchSpecific,cbDownloadAttachment其中第一个参数是一个TEXT部分字典 - 每个指定消息的一个条目。在这种情况下,只指定了一条消息,因此result将只是一个字典,其中包含一个对应于 UID = 247 消息的条目。

实际的附件内容嵌入在[0][4]条目的值中。它看起来像上面。几乎很明显,该JVB...部分是 base64 编码的内容。我试图用它解码它的第一小部分,base64它看起来像

%PDF-1.6\n%\xe2\xe3

这意味着这实际上是 PDF 文件的开头。

然后,您可以将内容写入本地文件,以便“下载”附件。在上面的例子中,它基本上是

with open('your/file/name', 'wb') as f:
    f.write(base64.urlsafe_b64decode(value[0][4][213:]))

请注意,213 被硬编码为内容的开头。在现实世界的情况下,您绝对应该解析TEXT一下以找出它。

希望这对和我有类似情况的人有所帮助。

于 2020-07-15T00:11:51.940 回答