我正在使用 Github3.py 从 Github 上的存储库中提取问题数据。以下是我从回购中提取问题的代码的一部分:
我在主代码中使用了这些库:
from github3 import login
from mysql.connector import IntegrityError
import config as cfg
import project_list
from github3.exceptions import NotFoundError
from github3.exceptions import GitHubException
import datetime
from database import Database
import sys
import re
import time
然后主要代码是:
DEBUG = False
def process(url, start):
re_pattern = re.compile(u'[^\u0000-\uD7FF\uE000-\uFFFF]', re.UNICODE)
splitted = url.split("/")
org_name = splitted[3]
repo_name = splitted[4]
while True:
try:
gh = login(token = cfg.TOKEN)
repo = gh.repository(org_name, repo_name)
print("{} =====================".format(repo))
if start is None:
i = 1
else:
i = int(start)
if start is None:
j = 1
else:
j = int(start)
Database.connect()
while True:
try:
issue = repo.issue(i)
issue_id = issue.id
issue_number = issue.number
status_issue = str(issue.state)
close_author = str(issue.closed_by)
com_count = issue.comments_count
title = re_pattern.sub(u'\uFFFD', issue.title)
created_at = issue.created_at
closed_at = issue.closed_at
now = datetime.datetime.now()
reporter = str(issue.user)
body_text = issue.body_text
body_html = issue.body_html
if body_text is None:
body_text = ""
if body_html is None:
body_html = ""
body_text = re_pattern.sub(u'\uFFFD', body_text)
body_html = re_pattern.sub(u'\uFFFD', body_html)
Database.insert_issues(issue_id, issue_number, repo_name,status_issue , close_author, com_count, title, reporter, created_at, closed_at, now, body_text, body_html)
print("{} inserted.".format(issue_id))
if DEBUG == True:
break;
except NotFoundError as e:
print("Exception @ {}: {}".format(i, str(e)))
except IntegrityError as e:
print("Data was there @ {}".format(str(e)))
i += 1
j += 1
except GitHubException as e:
print("Exception: {}".format(str(e)))
time.sleep(1000)
i -= 1
j -= 1
if __name__ == "__main__":
if len(sys.argv) == 1:
sys.exit("Please specify project name: python issue-github3.py <project name>")
if len(sys.argv) == 2:
start = None
print("Start from the beginning")
else:
start = sys.argv[2]
project = sys.argv[1]
url = project_list.get_project(project)
process(url, start)
使用上面的代码,对我来说一切正常,我可以从 GitHub 上的 repo 中提取问题。
问题:Exception: 410 Issues are disabled for this repo
从回购中成功提取 100 个问题后发生。
我该如何解决这个问题?
from github3.exceptions import NotFoundError
如主代码中所述,我使用以下代码的库修复了异常 404(即未找到问题) :
except NotFoundError as e:
print("Exception @ {}: {}".format(i, str(e)))
给定主要代码,我应该使用什么库和代码来修复异常 410?