我需要在不使用任何外部库的情况下解析如下所示的 arff 文件。我不确定如何将属性与数值相关联。就像我怎么能说每行中的第一个数值是年龄,而第二个是性别?您还可以将我链接到一些用于解析类似场景的python代码吗?
@relation cleveland-14-heart-disease
@attribute 'age' real
@attribute 'sex' { female, male}
@attribute 'cp' { typ_angina, asympt, non_anginal, atyp_angina}
@attribute 'trestbps' real
@attribute 'chol' real
@attribute 'fbs' { t, f}
@attribute 'restecg' { left_vent_hyper, normal, st_t_wave_abnormality}
@attribute 'thalach' real
@attribute 'exang' { no, yes}
@attribute 'oldpeak' real
@attribute 'slope' { up, flat, down}
@attribute 'ca' real
@attribute 'thal' { fixed_defect, normal, reversable_defect}
@attribute 'class' { negative, positive}
@data
63,male,typ_angina,145,233,t,left_vent_hyper,150,no,2.3,down,0,fixed_defect,negative
37,male,non_anginal,130,250,f,normal,187,no,3.5,down,0,normal,negative
41,female,atyp_angina,130,204,f,left_vent_hyper,172,no,1.4,up,0,normal,negative
56,male,atyp_angina,120,236,f,normal,178,no,0.8,up,0,normal,negative
57,female,asympt,120,354,f,normal,163,yes,0.6,up,0,normal,negative
57,male,asympt,140,192,f,normal,148,no,0.4,flat,0,fixed_defect,negative
56,female,atyp_angina,140,294,f,left_vent_hyper,153,no,1.3,flat,0,normal,negative
44,male,atyp_angina,120,263,f,normal,173,no,0,up,0,reversable_defect,negative
52,male,non_anginal,172,199,t,normal,162,no,0.5,up,0,reversable_defect,negative
这是我编写的示例代码:
arr=[]
arff_file = open("heart_train.arff")
count=0
for line in arff_file:
count+=1
#line=line.strip("\n")
#line=line.split(',')
if not (line.startswith("@")):
if not (line.startswith("%")):
line=line.strip("\n")
line=line.split(',')
arr.append(line)
print(arr[1:30])
但是输出与我预期的非常不同:
[['37', 'male', 'non_anginal', '130', '250', 'f', 'normal', '187', 'no', '3.5', 'down', '0', 'normal', 'negative'], ['41', 'female', 'atyp_angina', '130', '204', 'f', 'left_vent_hyper', '172', 'no', '1.4', 'up', '0', 'normal', 'negative'], ['56', 'male', 'atyp_angina', '120', '236', 'f', 'normal', '178', 'no', '0.8', 'up', '0', 'normal', 'negative'], ['57', 'female', 'asympt', '120', '354', 'f', 'normal', '163', 'yes', '0.6', 'up', '0', 'normal', 'negative'], ['57', 'male', 'asympt', '140', '192', 'f', 'normal', '148', 'no', '0.4', 'flat', '0', 'fixed_defect', 'negative'], ['56', 'female', 'atyp_angina', '140', '294', 'f', 'left_vent_hyper', '153', 'no', '1.3', 'flat', '0', 'normal', 'negative'], ['44', 'male', 'atyp_angina', '120', '263', 'f', 'normal', '173', 'no', '0', 'up', '0', 'reversable_defect', 'negative'], ['52', 'male', 'non_anginal', '172', '199', 't', 'normal', '162', 'no', '0.5', 'up', '0', 'reversable_defect', 'negative'], ['57', 'male', 'non_anginal', '150', '168', 'f', 'normal', '174', 'no', '1.6', 'up', '0', 'normal', 'negative'], ['54', 'male', 'asympt', '140', '239', 'f', 'normal', '160', 'no', '1.2', 'up', '0', 'normal', 'negative'], ['48', 'female', 'non_anginal', '130', '275', 'f', 'normal', '139', 'no', '0.2', 'up', '0', 'normal', 'negative'], ['49', 'male', 'atyp_angina', '130', '266', 'f', 'normal', '171', 'no', '0.6', 'up', '0', 'normal', 'negative'], ['64', 'male', 'typ_angina', '110', '211', 'f', 'left_vent_hyper', '144', 'yes', '1.8', 'flat', '0', 'normal', 'negative'], ['58', 'female', 'typ_angina', '150', '283', 't', 'left_vent_hyper', '162', 'no', '1', 'up', '0', 'normal', 'negative'], ['50', 'female', 'non_anginal', '120', '219', 'f', 'normal', '158', 'no', '1.6', 'flat', '0', 'normal', 'negative'], ['58', 'female', 'non_anginal', '120', '340', 'f', 'normal', '172', 'no', '0', 'up', '0', 'normal', 'negative'], ['66', 'female', 'typ_angina', '150', '226', 'f', 'normal', '114', 'no', '2.6', 'down', '0', 'normal', 'negative'], ['43', 'male', 'asympt', '150', '247', 'f', 'normal', '171', 'no', '1.5', 'up', '0', 'normal', 'negative'], ['69', 'female', 'typ_angina', '140', '239', 'f', 'normal', '151', 'no', '1.8', 'up', '2', 'normal', 'negative'], ['59', 'male', 'asympt', '135', '234', 'f', 'normal', '161', 'no', '0.5', 'flat', '0', 'reversable_defect', 'negative'], ['44', 'male', 'non_anginal', '130', '233', 'f', 'normal', '179', 'yes', '0.4', 'up', '0', 'normal', 'negative'], ['42', 'male', 'asympt', '140', '226', 'f', 'normal', '178', 'no', '0', 'up', '0', 'normal', 'negative'], ['61', 'male', 'non_anginal', '150', '243', 't', 'normal', '137', 'yes', '1', 'flat', '0', 'normal', 'negative'], ['40', 'male', 'typ_angina', '140', '199', 'f', 'normal', '178', 'yes', '1.4', 'up', '0', 'reversable_defect', 'negative'], ['71', 'female', 'atyp_angina', '160', '302', 'f', 'normal', '162', 'no', '0.4', 'up', '2', 'normal', 'negative'], ['59', 'male', 'non_anginal', '150', '212', 't', 'normal', '157', 'no', '1.6', 'up', '0', 'normal', 'negative'], ['51', 'male', 'non_anginal', '110', '175', 'f', 'normal', '123', 'no', '0.6', 'up', '0', 'normal', 'negative'], ['65', 'female', 'non_anginal', '140', '417', 't', 'left_vent_hyper', '157', 'no', '0.8', 'up', '1', 'normal', 'negative'], ['53', 'male', 'non_anginal', '130', '197', 't', 'left_vent_hyper', '152', 'no', '1.2', 'down', '0', 'normal', 'negative']]
您知道如何获得由 arff 库(来自 Weka)创建的如下输出吗?