我有一个(县、州、年)元组的 csv 文件,名为ctyear.csv
. 对于每个元组,我需要按年份、县和州划分的快餐店和杂货店(num_ff
和)的数量。num_groc
此信息以下列格式保存在按年份命名的人口普查数据文件中(因此cbp96co.csv'
指的是 1996 年的数据):
state, year, county, industry_code, number_of_establishments
'1', '1996', '55030', '30', '20'
'1', '1996', '55404', '31', '13'
'14', '1996' , '23', '5411', '24'
快餐和杂货的行业代码——我有兴趣获取信息的行业——每年都不同。辅助函数whichcode()
(未包含在以下代码中)返回与给定年份相关的行业代码。
给定一年,打开文件句柄rawff
、rawgroc
、fferrors
和grocerrors
,以下函数查找每个(县、州、年)元组的杂货店和快餐店的数量(由该年的相应行业代码给出)ctyear
并将其写入上面提到的 CSV 文件句柄。
这个函数打印正确的结果;从这个意义上说,它有效。它提取正确的数据。然而,它不会结束。当我调用它时,即使打印了最后一行,该函数也不会终止。当我在终端中调用此函数一年后,在打印函数的最后一行后,终端只是在接下来的几个小时内显示一个闪烁的光标——它似乎并没有停止。可能出了什么问题?
def extractCBPdata(year, rawff, rawgroc, fferrors, grocerrors):
# Initialize file name, get the set of codes we're interested in extracting for this year using whichcode()
fname = "cbp"+str(year)+"co.txt"
info = whichcode(year)
codename = info['codename']
groccode = info['groccode']
ffcode = info['ffcode']
cbpind = {}
# Index data file by (state, county, year, code)
for line in csv.DictReader(open(fname, 'rb')):
cbpind[(int(line['fipstate']), int(line['fipscty']), getyear(year), line[codename])] = line
# Read a list of tuples of (state, county, year) that we want information about
ctyear = csv.DictReader(open('ctyear.csv', 'r'))
# For every (state, county, year) tuple in the above file, get info from the cbpind for relevant codes
for c in ctyear:
if c['year'] == getyear(year):
fipstate = int(c['fipstate'])
fipscty = int(c['fipscty'])
for fc in ffcode:
if (fipstate, fipscty, getyear(year), fc) in cbpind:
line1 = cbpind[(fipstate, fipscty, getyear(year), fc)]
st = line1['fipstate']
cty = line1['fipscty']
num_ff = line1['est']
row_titles = ['fipstate', 'fipscty', 'codename', 'code', 'year', 'num_ff']
row = [st, cty, codename, fc, getyear(year), num_ff]
rawff.writerow(dict(zip(row_titles, row)))
else:
fferrors.writerow(dict(zip(['fipstate', 'fipscty', 'year', 'ffcode'], [fipstate, fipscty, getyear(year), fc])))
for gc in groccode:
if (fipstate, fipscty, getyear(year), gc) in cbpind:
line2 = cbpind[(fipstate, fipscty, getyear(year), gc)]
st = line2['fipstate']
cty = line2['fipscty']
num_groc = line2['est']
row_titles = ['fipstate', 'fipscty', 'codename', 'code', 'year', 'num_groc']
row = [st, cty, codename, fc, year, num_groc]
rawgroc.writerow(dict(zip(row_titles, row)))
else:
grocerrors.writerow(dict(zip(['fipstate', 'fipscty', 'year', 'grocode'], [fipstate, fipscty, year, gc])))
print "Done writing for " + str(year)
return
我使用以下几行来实现此功能:
g = ['num_groc', 'fipstate', 'fipscty', 'codename', 'code', 'year']
ff = ['num_ff', 'fipstate', 'fipscty', 'codename', 'code', 'year']
rawff = csv.DictWriter(open('raw_ff' + str(year) + '.csv', 'w'), fieldnames = ff)
rawgroc = csv.DictWriter(open('raw_groc' + str(year) + '.csv', 'w'), fieldnames = g)
fferrors = csv.DictWriter(open('fferrors' + str(year) + '.csv', 'w'), fieldnames = ['fipstate', 'fipscty', 'year', 'ffcode'])
grocerrors = csv.DictWriter(open('grocerrors' + str(year) + '.csv', 'w'), fieldnames = ['fipstate', 'fipscty', 'year', 'grocode'])
grocerrors.writerow(dict(zip(['fipstate', 'fipscty', 'year', 'grocode'], ['fipstate', 'fipscty', 'year', 'grocode'])))
rawff.writerow(dict(zip(ff,ff)))
rawgroc.writerow(dict(zip(g,g)))
fferrors.writerow(dict(zip(['fipstate', 'fipscty', 'year', 'ffcode'],['fipstate', 'fipscty', 'year', 'ffcode'])))
years = ['96', '97', '98', '99', '00', '01', '02', '03', '04', '05']
for y in years:
l = extractCBPdata(y)