python - csv文件比较差异

Question

我有两个 csv 文件 ( ) 我可以让它打印出列表，其中包含从 csv 文件中汇总的总数。我使用这个代码：

 import csv 
 import difflib

 file  = open('test1.csv',"rb") #Open CSV File in Read Mode 
 reader = csv.reader(file)      #Create reader object which iterates over lines 

 class Object:                   #Object to store unique data 
  def __init__(self, name, produce, amount): 
    self.name = name 
    self.produce = produce 
    self.amount = amount 

 rownum = 0 #Row Number currently iterating over 
 list = []  #List to store objects 

 def checkList(name, produce, amount): 

 for object in list:  #Iterate through list         
    if object.name == name and object.produce == produce:  #Check if name and produce        combination exists 
        object.amount += int(amount) #If it does add to amount variable and break out 
        return 

newObject = Object(name, produce, int(amount)) #Create a new object with new name, produce, and amount 
list.append(newObject)  #Add to list and break out 


 for row in reader:  #Iterate through all the rows 
  if rownum == 0:  #Store header row seperately to not get confused 
    header = row 
else: 
    name = row[0]  #Store name 
    produce = row[1]  #Store produce 
    amount = row[2]  #Store amount 

    if len(list) == 0:  #Default case if list = 0 
        newObject = Object(name, produce, int(amount)) 
        list.append(newObject) 
    else:  #If not... 
        checkList(name, produce, amount) 


rownum += 1 

 for each in list: 
  file1 =  each.name, each.produce, each.amount #END OF FILE 1


 file  = open('test2.csv',"rb") #Open CSV File in Read Mode 
 reader = csv.reader(file)      #Create reader object which iterates over lines 

 class Object:                   #Object to store unique data 
  def __init__(self, name, produce, amount): 
    self.name = name 
    self.produce = produce 
    self.amount = amount 

 rownum = 0 #Row Number currently iterating over 
 list = []  #List to store objects 

 def checkList(name, produce, amount): 

  for object in list:  #Iterate through list         
    if object.name == name and object.produce == produce:  #Check if name and produce    combination exists 
        object.amount += int(amount) #If it does add to amount variable and break out 
        return 

newObject = Object(name, produce, int(amount)) #Create a new object with new name, produce, and amount 
list.append(newObject)  #Add to list and break out 


 for row in reader:  #Iterate through all the rows 
  if rownum == 0:  #Store header row seperately to not get confused 
    header = row 
else: 
    name = row[0]  #Store name 
    produce = row[1]  #Store produce 
    amount = row[2]  #Store amount 

    if len(list) == 0:  #Default case if list = 0 
        newObject = Object(name, produce, int(amount)) 
        list.append(newObject) 
    else:  #If not... 
        checkList(name, produce, amount) 


rownum += 1 

 for each in list: 
   file2 = each.name, each.produce, each.amount #END OF FILE 2

所有这一切都很好，我提供它只是为了让你可以看到我在做什么。

所以现在我需要了解我创建的两个新文件之间的区别。这就是我卡住的地方；我试过这个但没有运气

 diff=difflib.ndiff('file1',"rb"), ('file2',"rb")
 try:
    while 1:
        print diff.next(),

 except:
     pass

我需要生成两个新文件之间的差异，以便我可以看到任何建议的差异？当我运行它时，我没有收到任何错误，但没有输出

score 1 · Accepted Answer

编辑：鉴于您之前的问题，您似乎应该已经知道出了什么问题。

首先，您需要使用正确数量的括号。

diff = difflib.ndiff(('file1', 'rb'), ('file2', 'rb'))

但这仍然是不正确的，因为difflib.ndiff需要两个字符串列表，而不是未打开文件的名称和模式。您需要将文件的内容读入行列表。

a = open('file1', 'rb').read().splitlines()
b = open('file2', 'rb').read().splitlines()

for diff in difflib.ndiff(a, b):
    print diff

python - csv文件比较差异

1 回答 1

Related

Reference