-1

我无法让这个程序工作。

'''
Tasks are as follows:
1. The code to clean up the raw data and to use this information in the graphics package (R Project)
2. A graph of the month of birth and the number of the Omphaloceles and the number of children with Gastroschisis. (The counts in the file may be given as separate values. Use the sum of these two conditions in your graph.)
3. A graph of the Educational level of the mother versus the birth weight of the infant
4. A graph of the age of the mother and the trimester (not the month) of the start of prenatal care
'''


import re
nat=open('D:\Documents\Project\Nat2010us\VS2010NATL.DETAILUS.PUB', mode='rt')

#lists
Revision=[]
MonthofBirth=[]
MaternalAge=[]
MaternalEducation=[]
MonthofStartofPrenatalCare=[]
BirthWeight=[]
CongenitalAnomalies=[]
OmphaloceleGastroschisis=[]
#encoded lists
enrevision=[]
enmonthofbirth=[]
enmaternalage=[]
enmaternaleducation=[]
enmonthofstartofprenatalcare=[]
enbirthweight=[]
encongenitalanomalies=[]
enomphalocelegastroschisis=[]

#selecting data, S is Unrevised data and A Revised. For Month of Start of Prenatal Care I chose the two columns (246 and 258) that were both found in Unrevised and Revised sets.
for x in nat:
    Revision.append(x[6])
    MonthofBirth.append(x[18:20])
    MaternalAge.append(x[88:90])
    if x[6]=="S": MonthofStartofPrenatalCare.append(x[246])
    if x[6]=="A": MonthofStartofPrenatalCare.append(x[258])
    BirthWeight.append(x[470:472])
    CongenitalAnomalies.append(x[512])
    OmphaloceleGastroschisis.append(x[760])
    if x[6]=="S": MaternalEducation.append(x[155:157])
    if x[6]=="A": MaternalEducation.append(x[154])
nat.close()


#encoding the data, using 'en' as noting encoded lists
for x in Revision:
    if x=="S": enrevision.append("U")
    if x=="A": enrevision.append("R")
for x in MonthofBirth:
    if x=="01": enmonthofbirth.append("January")
    if x=="02": enmonthofbirth.append("February")
    if x=="03": enmonthofbirth.append("March")
    if x=="04": enmonthofbirth.append("April")
    if x=="05": enmonthofbirth.append("May")
    if x=="06": enmonthofbirth.append("June")
    if x=="07": enmonthofbirth.append("July")
    if x=="08": enmonthofbirth.append("August")
    if x=="09": enmonthofbirth.append("September")
    if x=="10": enmonthofbirth.append("October")
    if x=="11": enmonthofbirth.append("November")
    if x=="12": enmonthofbirth.append("December")
for x in MaternalAge:
    if x=="12": enmaternalage.append("10-12")
    if x=="13": enmaternalage.append("13")
    if x=="14": enmaternalage.append("14")
    if x=="15": enmaternalage.append("15")
    if x=="16": enmaternalage.append("16")
    if x=="17": enmaternalage.append("17")
    if x=="18": enmaternalage.append("18")
    if x=="19": enmaternalage.append("19")
    if x=="20": enmaternalage.append("20")
    if x=="21": enmaternalage.append("21")
    if x=="22": enmaternalage.append("22")
    if x=="23": enmaternalage.append("23")
    if x=="24": enmaternalage.append("24")
    if x=="25": enmaternalage.append("25")
    if x=="26": enmaternalage.append("26")
    if x=="27": enmaternalage.append("27")
    if x=="28": enmaternalage.append("28")
    if x=="29": enmaternalage.append("29")
    if x=="30": enmaternalage.append("30")
    if x=="31": enmaternalage.append("31")
    if x=="32": enmaternalage.append("32")
    if x=="33": enmaternalage.append("33")
    if x=="34": enmaternalage.append("34")
    if x=="35": enmaternalage.append("35")
    if x=="36": enmaternalage.append("36")
    if x=="37": enmaternalage.append("37")
    if x=="38": enmaternalage.append("38")
    if x=="39": enmaternalage.append("39")
    if x=="40": enmaternalage.append("40")
    if x=="41": enmaternalage.append("41")
    if x=="42": enmaternalage.append("42")
    if x=="43": enmaternalage.append("43")
    if x=="44": enmaternalage.append("44")
    if x=="45": enmaternalage.append("45")
    if x=="46": enmaternalage.append("46")
    if x=="47": enmaternalage.append("47")
    if x=="48": enmaternalage.append("48")
    if x=="49": enmaternalage.append("49")
    if x=="50": enmaternalage.append("50-54")
for x in MonthofStartofPrenatalCare:
    if x=="1": enmonthofstartofprenatalcare.append("1st Trimester")
    if x=="2": enmonthofstartofprenatalcare.append("2nd Trimester")
    if x=="3": enmonthofstartofprenatalcare.append("3rd Trimester")
    if x=="4": enmonthofstartofprenatalcare.append("No Prenatal Care")
    if x=="5": enmonthofstartofprenatalcare.append("unknown or not stated")
    if x==" ": enmonthofstartofprenatalcare.append("not on certificate")
for x in BirthWeight:
    if x=="01": enbirthweight.append("499 or less")
    if x=="02": enbirthweight.append("500-999")
    if x=="03": enbirthweight.append("1000-1499")
    if x=="04": enbirthweight.append("1500-1999")
    if x=="05": enbirthweight.append("2000-2499")
    if x=="06": enbirthweight.append("2500-2999")
    if x=="07": enbirthweight.append("3000-3499")
    if x=="08": enbirthweight.append("3500-3999")
    if x=="09": enbirthweight.append("4000-4499")
    if x=="10": enbirthweight.append("4500-4999")
    if x=="11": enbirthweight.append("5000-8165")
    if x=="12": enbirthweight.append("not stated")
for x in CongenitalAnomalies:
    if x=="1": encongenitalanomalies.append("anomaly reported")
    if x=="2": encongenitalanomalies.append("anomaly not reported")
    if x=="9": encongenitalanomalies.append("anomaly not classified")
    if x==" ": encongenitalanomalies.append("not on certificate")
for x in OmphaloceleGastroschisis:
    if x=="0": enomphalocelegastroschisis.append("not reporting")
    if x=="1": enomphalocelegastroschisis.append("reporting")

#encoding the two different education codes to be coded the same
for x in range(0, len (MaternalEducation)):
    if Revision[x]=="A":
        if MaternalEducation[x]=="1": enmaternaleducation.append("8th grade or less")
        if MaternalEducation[x]=="2": enmaternaleducation.append("9th through 12th grade no diploma")
        if MaternalEducation[x]=="3": enmaternaleducation.append("High school graduate or GED completed")
        if MaternalEducation[x]=="4": enmaternaleducation.append("Some college credit but no degree")
        if MaternalEducation[x]=="5" or MaternalEducation[x]=="6": enmaternaleducation.append("Associate and/or Bachelor")
        if MaternalEducation[x]=="7" or MaternalEducation[x]=="8": enmaternaleducation.append("Master's or Doctorate")
        if MaternalEducation[x]=="9": enmaternaleducation.append("not stated")
        if MaternalEducation[x]==" ": enmaternaleducation.append("blank")        
    if Revision[x]=="S":
        if MaternalEducation[x]=="00" or MaternalEducation[x]=="01-08": enmaternaleducation.append("8th grade or less")
        if MaternalEducation[x]=="09" or MaternalEducation[x]=="10" or MaternalEducation[x]=="11": enmaternaleducation.append("9th through 12th grade no diploma")
        if MaternalEducation[x]=="12": enmaternaleducation.append("High school graduate or GED completed")
        if MaternalEducation[x]=="13": enmaternaleducation.append("Some college credit but no degree")
        if MaternalEducation[x]=="14" or MaternalEducation[x]=="15" or MaternalEducation[x]=="16": enmaternaleducation.append("Associate and/or Bachelor")
        if MaternalEducation[x]=="17": enmaternaleducation.append("Master's or Doctorate") 
        if MaternalEducation[x]=="99": enmaternaleducation.append("not stated")
        if MaternalEducation[x]==" ": enmaternaleducation.append("blank")

#open new file for output of data
'''
enmonthofbirth=[]
enmaternalage=[]
enmaternaleducation=[]
enmonthofstartofprenatalcare=[]
enbirthweight=[]
encongenitalanomalies=[]
enomphalocelegastroschisis=[]
'''
#write header and then, for each line, replace values in the Natality file with the encoded values from encoded lists (line by line) in the outputforR file. Matching up values based on commonality they all share - length.
#\n to end line to  break to new line
f=open('D:\Documents\Project\outputforR.csv', mode='w')

f.write('month of birth'+','+'maternal age'+','+'maternal education'+','+'month of start of prenatal care'+','+'birth weight'+','+'congenital anomalies'+','+'omphalocele/gastroschisis')

for x in range(0,len(enmonthofbirth)):
    f.write(enmonthofbirth[x]+','+enmaternalage[x]+','+enmaternaleducation[x]+','+enmonthofstartofprenatalcare[x]+','+enbirthweight[x]+','+encongenitalanomalies[x]+','+enomphalocelegastroschisis[x]) \n

f.close()

我似乎无法修复第 170 行。我以不同的方式弄乱了 \n 以尝试写入此文件,以便我可以继续将数据放入 R 中。我的标题将在 Excel 中写入和打开很好,但我无法让 Python 将每个列表中的值写入相应标题下方。我被告知我需要 \n 才能让它进入下一行,但它只会允许 \ 本身,这是行不通的。删除 \n 也不起作用。\ 并没有让我知道此链接中的确切内容。以下。

下面是我的 Excel 文件的图像的链接。

http://i.imgur.com/GDReiQh.png

4

1 回答 1

0

\n从以下行中删除:

f.write(enmonthofbirth[x]+','+...) \n
#                                 ^^^

更新

如果你想写,写成如下:

f.write(enmonthofbirth[x]+','+... + '\n')
于 2013-11-11T03:45:08.943 回答