1

我正在尝试从Web读取一些JSON文件并使用这些数据创建一个SQL数据库。我正在使用ijson将数据作为流读取。但是当代码失败时,我需要重新开始检索数据。有没有办法从程序失败的地方继续读取 JSON 文件?

我可以使用 json.loads 读取整个文档,但我假设数据太大而无法一次读取。

你可以在下面看到我的代码。

import sqlite3
import ssl
import urllib.request
import json
import ijson
import time

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

conn = sqlite3.connect('rawdata.sqlite')
cur = conn.cursor()

cur.execute('''DROP TABLE IF EXISTS DailyData ''')
cur.execute('''DROP TABLE IF EXISTS Countries ''')
cur.execute('''DROP TABLE IF EXISTS Continents ''')


cur.execute('''CREATE TABLE IF NOT EXISTS DailyData
    (id INTEGER, Day TEXT, Month TEXT, Year TEXT, country_id INTEGER, continent_id INTEGER, Cases TEXT, Deaths TEXT)''')

cur.execute('''CREATE TABLE IF NOT EXISTS Countries
    (id INTEGER,     CountryCode TEXT UNIQUE, Country TEXT UNIQUE, Population TEXT, continent_id INTEGER)''')

cur.execute('''CREATE TABLE IF NOT EXISTS Continents
    (id INTEGER, Continent TEXT UNIQUE)''')

url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
f = urllib.request.urlopen(url, context=ctx)
reports = ijson.items(f, 'records.item')

sum = 0
count = 0
# error = 0
for item in reports :

    iDataRep = item.get('dateRep')
    iCases = item.get('cases')
    iDeaths = item.get('deaths')
    iCountryCode = item.get('countryterritoryCode')
    iCountry = item.get('countriesAndTerritories')
    iPopulation = item.get('popData2018')
    iContinent = item.get('continentExp')

    if len(iDataRep) < 0: iDataRep = 0
    if len(iCases) < 0: iCases = 0
    if len(iDeaths) < 0: iDeaths = 0
    if len(iCountryCode) < 0: iCountryCode = 0
    if len(iCountry) < 0: iCountry = 0
    if len(iPopulation) < 0: iPopulation = 0
    if len(iContinent) < 0: iContinent = 0

    Spl = iDataRep.split('/')
    iDay = Spl[0]
    iMonth = Spl[1]
    iYear = Spl[2]
    id = count + 1

    cur.execute('''INSERT OR IGNORE INTO Continents (id, Continent)
        VALUES ( ?, ? )''', (id, iContinent))

    cur.execute('''SELECT id FROM Continents WHERE Continent = ? ''', (iContinent, ))
    continent_id = cur.fetchone()[0]

    cur.execute('''INSERT OR IGNORE INTO Countries (id, CountryCode, Country, Population, continent_id)
        VALUES ( ?, ?, ?, ?, ? )''', (id, iCountryCode, iCountry, iPopulation, continent_id) )

    cur.execute('''SELECT id FROM Countries WHERE Country = ? ''', (iCountry, ))
    country_id = cur.fetchone()[0]

    cur.execute('''INSERT OR IGNORE INTO DailyData (id, Day, Month, Year, country_id, continent_id, Cases, Deaths)
        VALUES ( ?, ?, ?, ?, ?, ?, ? ,?)''', (id, iDay, iMonth, iYear, country_id, continent_id, iCases, iDeaths) )

    conn.commit()

    # except:
    #     error = error + 1
    #     print(error)
    #     continue

    count = count + 1

    print(count, 'data retrieved...')

    if count % 95 == 0:
        time.sleep(1)
        print('Program slept a second.')

numCountry = cur.execute('SELECT max(id) FROM Countries' )
numContinent = cur.execute('SELECT max(id) FROM Continents' )

print('From', numCountry, 'different countries and', numContinent, 'continents', count, 'data retrieved.')

cur.close()
4

0 回答 0