0

编写机器人爬取论坛并根据关键字通过电子邮件向用户发送报告。遇到一些麻烦。

也有点担心我的导入不正确,因为我正试图通过这个来混淆。

当我运行它时设置了这个错误(显然电子邮件信息是 ****):

    E:\>python dgcrbot.py
Traceback (most recent call last):
  File "dgcrbot.py", line 95, in <module>
    main()
  File "dgcrbot.py", line 91, in main
    Email('*****')
  File "dgcrbot.py", line 67, in __init__
    self.run()
  File "dgcrbot.py", line 87, in run
    self.send_message()
  File "dgcrbot.py", line 70, in send_message
    matches = Site('http://www.dgcoursereview.com/forums/forumdisplay.php?f=2')
  File "dgcrbot.py", line 23, in __init__
    self.check_posts()
  File "dgcrbot.py", line 55, in check_posts
    if any(pattern.lower() in title.lower() for pattern in patterns) and self.check_database(self, posts[title]) is False:
TypeError: check_database() takes 2 positional arguments but 3 were given

完全转储

import re
import sqlite3
import urllib.request
import html.parser
import smtplib
from bs4 import BeautifulSoup
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

patterns = [****]
data = sqlite3.connect('discgolf.db')
cur = data.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS checked(id)')
data.commit()

server = smtplib.SMTP()
class Site(object):

    def __init__(self,forum):
        self.forum = forum
        self.check_posts()

    def get_url(self):
        posts = {}
        html = BeautifulSoup(urllib.request.urlopen(self.forum).read().decode('utf-8','ignore'),'html.parser')
        titles = html.find_all('td',class_='alt1')
        for title in titles:
            try:
                url = str(title)[re.search('<a href="',str(title)).span()[1]:]
                url = url[:re.search('">',url).span()[0]]
                url = url[re.search('amp;t=',url).span()[1]:]
                title = str(title)[re.search('title=',str(title)).span()[1]:re.search('">',str(title)).span()[0]]
                posts[title] = url 
            except:
                pass
        return posts

    def check_database(self, identity):
        cur.execute('SELECT * FROM checked WHERE id=?',[identity])
        if cur.fetchone():
            return True
        else:
            return False

    def submit_to_database(self, identity):
        cur.execute('INSERT INTO checked VALUES(?)',[identity])
        data.commit()

    def check_posts(self):
        posts = self.get_url()
        matches = {}
        for title in posts:
            if any(pattern.lower() in title.lower() for pattern in patterns) and self.check_database(self, posts[title]) is False:
                permalink = 'http://www.dgcoursereview.com/forums/showthread.php?t={}'.format(post[title])
                matches[title] = permalink
                self.submit_to_database(posts[title])
        return matches


class Email(object):

    def __init__(self, to_address, from_address='*****'):
        self.to_address = to_address
        self.from_address = from_address
        self.run()

    def send_message(self,subject='Found Match', body='N/A'):
        matches = Site('http://www.dgcoursereview.com/forums/forumdisplay.php?f=2')
        msg = MIMEMultipart()
        msg['From'] = self.from_address
        msg['To'] = self.to_address
        msg['Subject'] = DGCR - AutoBot
        body = ''
        for title in matches:
            body += '{} -- {}\n\n'.format(title,matches[title])
        msg.attach(MIMEText(body,'plain'))
        server = smtplib.SMTP('*****')
        server.starttls()
        server.login(self.from_address,'*****')
        text = msg.as_string()
        server.send_email(self.from_address, self.to_address, text)
        server.quit()

    def run(self):
        self.send_message()

def main():
    while True:
        Email('*****')
        time.sleep(10*60)

if __name__ == '__main__':
    main()
4

0 回答 0