0

运行此代码以在夜间运行中获取课堂提交。像这样的另一个代码运行良好,但这个代码似乎在批处理后随机停止在随机位置。我正在使用云 sql 存储分配以在循环时循环遍历没有问题,因为对循环进行计数会显示所有提交。

它在第 12 到第 17 批周围循环,并且毫无例外地停止并显示它已完成。这种行为似乎没有理由?我们有大约 180,000 个作业,但没有完成。

from sqlalchemy import create_engine
import psycopg2
from google.oauth2 import service_account
import json
import base64
# import httplib2
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
from pandas.io.json import json_normalize
from google.auth.transport.requests import AuthorizedSession
from apiclient.http import BatchHttpRequest
import httplib2
import time
from apiclient.discovery import build
import pandas as pd
import random
from googleapiclient.errors import HttpError
import email, smtplib, ssl
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import socket
socket.setdefaulttimeout(100000)

start = time.time()
http = httplib2.Http()

log = []
array = []
batch_array = []
assignments = []
listwithtokens = []
listwithtokenstokens = []

credentials = service_account.Credentials.from_service_account_info(
    {
    },
    scopes=['https://www.googleapis.com/auth/admin.directory.user', 'https://www.googleapis.com/auth/classroom.courses',
            'https://www.googleapis.com/auth/classroom.rosters',
            'https://www.googleapis.com/auth/classroom.coursework.students'

            ]
)

email_sub = 'sa-gsync2@academiesenterprisetrust.org'
delegated_credentials = credentials.with_subject(email_sub)
service_two = build('classroom', 'v1', credentials=delegated_credentials)

engine = create_engine('postgresql+psycopg2://USER:PASSWORD@IP:PORT/DQ')
sql = '''SELECT "courseId", id FROM public.classroom_assignments;'''

def callback(request_id, response, exception):
    ids = request_id.split('|')
    if exception is not None:
        print('Error getting assignments "{0}" for course: "{1}"'.format(request_id, exception))
        tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
        listwithtokens.append(tokendata)
    else:
        assignments.extend(response.get("studentSubmissions", []))
        nextPageToken = response.get("nextPageToken", None)
        if nextPageToken:
            tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
            listwithtokens.append(tokendata)
        else:
            pass

with engine.connect() as con:
    try:
        rs = con.execute(sql)
        bcount = 0
        i = 0
        batch_n = 0
        batch = service_two.new_batch_http_request(callback=callback)
        start_time = time.time()
        for row in rs:
            i += 1
            response = service_two.courses().courseWork().studentSubmissions().list(pageToken=None, courseId=row[0] , courseWorkId =row[1], pageSize=500)
            ids = f"{row[0]}|{row[1]}"
            batch.add(response, request_id=ids)
            bcount += 1
            if bcount == 999:
                batch_n += 1
                print(batch_n)
                batch.execute()
                batch = service_two.new_batch_http_request(callback=callback)
                bcount = 0
                end = time.time() - start_time
                print('batch count',i)
                if end < 60:
                    diff = 60 - end
                    print(diff, 'seconds to sleep')
                    time.sleep(60)
                    start_time = time.time()
        if bcount > 0:
            batch.execute()
           
    except HttpError as err:
        if err.resp.get('content-type', '').startswith('application/json'):
            print(err.content)

这是错误<HttpError 502 when requesting classroom.googleapis.com/batch returned "Bad Gateway">

4

0 回答 0