我尝试导入 csv 并处理错误值,例如错误的十进制分隔符或 int/double 列中的字符串。我使用转换器来修复错误。对于数字列中的字符串,用户会看到一个输入框,他必须在其中修复值。是否可以获得实际“导入”的列名和/或行?如果没有,有没有更好的方法来做同样的事情?
example csv:
------------
description;elevation
point a;-10
point b;10,0
point c;35.5
point d;30x
from PyQt4 import QtGui
import numpy
from pandas import read_csv
def fixFloat(x):
# return x as float if possible
try:
return float(x)
except:
# if not, test if there is a , inside, replace it with a . and return it as float
try:
return float(x.replace(",", "."))
except:
changedValue, ok = QtGui.QInputDialog.getText(None, 'Fehlerhafter Wert', 'Bitte korrigieren sie den fehlerhaften Wert:', text=x)
if ok:
return self.fixFloat(changedValue)
else:
return -9999999999
def fixEmptyStrings(s):
if s == '':
return None
else:
return s
converters = {
'description': fixEmptyStrings,
'elevation': fixFloat
}
dtypes = {
'description': object,
'elevation': numpy.float64
}
csvData = read_csv('/tmp/csv.txt',
error_bad_lines=True,
dtype=dtypes,
converters=converters
)