0

我在 pdf 数据阅读器中有一个问题,因为下面的空单元格值代码将用于下一个单元格值..!

例如:一个学生有一个三科成绩。在主题 1:“A”级,主题 2:“”级,主题 3:“c”级。

这里对于 subj2 没有等级值并且它是空的,但这里它采用 subj 3 值“c”作为 subj 2 的值......

我该如何解决这个问题..?

请帮我 ...

    import tabula

def readpdf():
    df = tabula.read_pdf("/tmp/university_exam_results.pdf", output_format="json", pages="all")
    page = 1
    student_subject_grade = {}
    subject_codes = []
    for entry in df:
        table_row = 1
        subject_split = False
        for row in entry['data']:

            # subject_split = False

            if table_row == 1:

                col = 0
                pagebeginning = False
                for column in row:
                    if col == 0 and column['text'] == '':
                        pagebeginning = True
                        subject_codes = []
                    elif col == 1 and column['text'] == "Subject Code - >":
                        pagebeginning = True
                        subject_codes = []
                    if not pagebeginning:
                        if col == 0:
                            registration_number = column['text']
                            if not registration_number in student_subject_grade:
                                student_subject_grade[registration_number] = {}
                        elif col == 1:
                            student_name = column['text']
                            student_subject_grade[registration_number]['name'] = student_name
                        elif col > 1:                           
                            student_subject_grade[registration_number][subject_codes[col-2]] =  column['text']
                    else:
                        if col > 1:
                            subject_codes.append(column['text'])
                    col += 1    
            elif table_row == 2:
                if pagebeginning:
                    col = 0

                    for column in row:

                        if col == 0 and column['text'] == 'Reg. Number':                            
                            continue
                        elif col == 0 and column['text'] == '':
                            subject_split = True
                        elif col == 1 and column['text'] == 'Stud. Name':
                            continue
                        elif col == 1 and column['text'] == '' and subject_split:
                            subject_split = True

                        if subject_split and col > 1:
                            subject_codes[col-2] = subject_codes[col-2] + column['text']

                        col += 1
                else:
                    col = 0
                    for column in row:
                        if col == 0:
                            registration_number = column['text']
                            if not registration_number in student_subject_grade:
                                student_subject_grade[registration_number] = {}
                        elif col == 1:
                            student_name = column['text']
                            student_subject_grade[registration_number]['name'] = student_name
                        elif col > 1:
                            student_subject_grade[registration_number][subject_codes[col-2]] =  column['text']                          
                        col += 1
            else:   
                if pagebeginning and subject_split and table_row == 3:
                    col = 0
                    for column in row:
                        if col == 0 and column['text'] == 'Reg. Number':                            
                            continue
                        elif col == 1 and column['text'] == 'Stud. Name':
                            continue
                        col += 1    
                else:
                    col = 0     
                    for column in row:
                        if col == 0:
                            registration_number = column['text']
                            if not registration_number in student_subject_grade:
                                student_subject_grade[registration_number] = {}
                        elif col == 1:
                            student_name = column['text']
                            student_subject_grade[registration_number]['name'] = student_name
                        elif col > 1:                       
                            student_subject_grade[registration_number][subject_codes[col-2]] =  column['text']                      
                        col += 1                    
            table_row += 1

        page += 1

    total_students = 1      
    university_performance_ids = []
    for key, details in student_subject_grade.iteritems():
        if key == '953413114041':

            print "---------------------------------------------------------------------------"
            print total_students, key 
            print "---------------------------------------------------------------------------"

            print details, "--------------------------------------------------------------------------"

        registration_number = key
        student_name = details['name']

        for k, v in details.iteritems():
            if key == '953413114041':
                print k, ":", v
            if k == 'name':             
                continue
            if v != '':                     
                university_performance_ids.append((0, 0, {'registration_number': registration_number, 'student_name': student_name, 
                                                'subject_code': k, 'grade': v}))

        print "------------------------------------------------------------------------"

        total_students += 1
    return university_performance_ids
4

0 回答 0