-2
import sys, subprocess, glob

mdbfiles = glob.glob('*.res')
for DATABASE in mdbfiles: 

    subprocess.call(["mdb-schema", DATABASE, "mysql"])

    table_names = subprocess.Popen(["mdb-tables", "-1", DATABASE],
                                   stdout=subprocess.PIPE).communicate()[0]
    tables = table_names.splitlines()

    sys.stdout.flush()

    a=str('Channel_Normal_Table')

    for table in tables:
        if table != '' and table==a:

            filename = DATABASE.replace(".res","") + ".csv"
            file = open(filename, 'w')
            print("Dumping " + table)
            contents = subprocess.Popen(["mdb-export", DATABASE, table],
                                        stdout=subprocess.PIPE).communicate()[0]

            # I NEED TO PUT SOMETHING HERE TO SORT AND EXTRACT THE DATA I NEED


            file.write(contents)
            file.close()

我有一个从数据库中提取的表。让我们称之为table。我需要执行以下操作,但我有点卡住了:

Cycle Test_Time  Current    Voltage
1     7.80E-002 0.00E+000   1.21E-001
1     3.01E+001 0.00E+000   1.19E-001
1     6.02E+001 0.00E+000   1.17E-001
2     9.02E+001 0.00E+000   1.14E-001
2     1.20E+002 0.00E+000   1.11E-001
2     1.50E+002 0.00E+000   1.08E-001
2     1.80E+002 0.00E+000   1.05E-001
2     2.10E+002 0.00E+000   1.02E-001
3     2.40E+002 0.00E+000   9.93E-002
3     2.70E+002 0.00E+000   9.66E-002
3     3.00E+002 0.00E+000   9.38E-002
3     3.10E+002 4.00E-001   1.26E+000
  1. 提取每个周期的最后(最新)行,或者更高级地,按时间对周期进行排序并提取周期中时间最晚的行。如您所见,由于我们的测试机故障,最后一行并不总是有最新的时间,但通常有。但数字越大时间越晚。
  2. 提取最后五个周期的所有行
  3. 提取周期 4 到周期 30 的所有行。

我尝试了各种方法,例如根据我有限的 Python 知识创建和排序字典和列表,但都没有产生所需的输出。它只是让我发疯。非常感谢!

4

3 回答 3

1

首先,让我们读取文件并将找到的值转换为循环 col 的整数和其余的浮点数:

databyrow=[]
with open('/tmp/temps.txt', 'r') as f:
    header=f.readline().strip().split()
    for line in f:
        temp=[]
        for i,val in enumerate(line.strip().split()):
            fn=int if i==0 else float
            try:
                val=fn(val)
            except ValueError:
                print val,'not converted'
            temp.append(val)    
        databyrow.append(temp)                
print databyrow  

印刷:

 [[1, 0.078, 0.0, 0.121],
 [1, 30.1, 0.0, 0.119],
 [1, 60.2, 0.0, 0.117],
 [2, 90.2, 0.0, 0.114],
 [2, 120.0, 0.0, 0.111],
 [2, 150.0, 0.0, 0.108],
 [2, 180.0, 0.0, 0.105],
 [2, 210.0, 0.0, 0.102],
 [3, 240.0, 0.0, 0.0993],
 [3, 270.0, 0.0, 0.0966],
 [3, 300.0, 0.0, 0.0938],
 [3, 310.0, 0.4, 1.26]]

现在您可以根据刚刚创建的列表列表中的循环 col 列表创建组字典:

from itertools import groupby
keyfn=lambda t:t[0]
sorted_input=sorted(databyrow,key=keyfn)
data_bycycle={k:list(g) for k,g in groupby(sorted_input,key=keyfn)}

印刷:

{1: [[1, 0.078, 0.0, 0.121], [1, 30.1, 0.0, 0.119], [1, 60.2, 0.0, 0.117]], 
 2: [[2, 90.2, 0.0, 0.114], [2, 120.0, 0.0, 0.111], [2, 150.0, 0.0, 0.108], [2, 180.0, 0.0, 0.105], [2, 210.0, 0.0, 0.102]], 
 3: [[3, 240.0, 0.0, 0.0993], [3, 270.0, 0.0, 0.0966], [3, 300.0, 0.0, 0.0938], [3, 310.0, 0.4, 1.26]]}

现在您可以直接获取每个循环的最后 N 行:

>>> N=2
>>> data_bycycle[1][-N:]
[[1, 30.1, 0.0, 0.119], [1, 60.2, 0.0, 0.117]]    

如果您想要按最新时间排序的该组之一:

>>> sorted(data_bycycle[2],key=lambda li: li[1])[-1]
[2, 210.0, 0.0, 0.102]  

编辑

下载链接的保管箱文件,您有一个 csv 文件 - 没有空格分隔。

以下是阅读类似内容的方法:

import csv

databyrow=[]
with open('/tmp/VC0307a.csv', 'r') as f:      # potentially you can use 'contents' here
    for i,row in enumerate(csv.reader(f)):
        if i==0:
            header=row
        else:
            temp=[]
            for j,val in enumerate(row):
                fn=int if j in (0,1) else float
                try:
                    val=fn(val)
                except ValueError:
                    print val, 'not converted'
                temp.append(val)     
            databyrow.append(temp)

将其存储在内存中后,您可以按某个数字列进行排序:

>>> header
['Test_ID', 'Data_Point', 'Test_Time', 'Step_Time', 'DateTime', 'Step_Index', 'Cycle_Index', 'Is_FC_Data', 'Current', 'Voltage', 'Charge_Capacity', 'Discharge_Capacity', 'Charge_Energy', 'Discharge_Energy', 'dV/dt', 'Internal_Resistance', 'AC_Impedance', 'ACI_Phase_Angle']

>>> n=header.index('Test_Time') 
>>> by_time=sorted(databyrow,key=lambda t: t[n])
于 2013-06-18T18:18:16.150 回答
1

您可以使用pandaspymdb轻松完成工作

使用 pandas,您可以轻松处理时间序列数据。看看 pandas.DataFrame。这就是你所需要的。

于 2013-06-18T17:53:23.767 回答
1

这并不难,但您必须逐步完成:

from collections import defaultdict

table = """\
Cycle Test_Time  Current    Voltage
1     7.80E-002 0.00E+000   1.21E-001
1     3.01E+001 0.00E+000   1.19E-001
1     6.02E+001 0.00E+000   1.17E-001
2     9.02E+001 0.00E+000   1.14E-001
2     1.20E+002 0.00E+000   1.11E-001
2     1.50E+002 0.00E+000   1.08E-001
2     1.80E+002 0.00E+000   1.05E-001
2     2.10E+002 0.00E+000   1.02E-001
3     2.40E+002 0.00E+000   9.93E-002
3     2.70E+002 0.00E+000   9.66E-002
3     3.00E+002 0.00E+000   9.38E-002
3     3.10E+002 4.00E-001   1.26E+000"""

# Split into rows
table = table.splitlines()

# Split each row into values
table = [row.split() for row in table]

# Associate the column names with their index
headers = table.pop(0)
H = {x: i for i, x in enumerate(headers)}
time_index = H["Test_Time"]
cycle_index = H["Cycle"]

# Sort by Test_Time
table.sort(key=lambda row: float(row[time_index]))

# Associate each test with its cycle
D = defaultdict(list)
for row in table:
  D[int(row[cycle_index])].append(row)

# Present the information
print(*headers, sep='\t')
print("Latest row for each cycle")
for cycle in sorted(D.keys()):
  tests = D[cycle]
  latest_test = tests[-1]
  print(*latest_test, sep='\t')

print("All rows for last 5 cycles")
for cycle in sorted(D.keys())[-5:]:
  tests = D[cycle]
  for test in tests:
    print(*test, sep='\t')

print("All rows for cycles 4 through 30")
for cycle in sorted(D.keys()):
    if 4 <= cycle <= 30:
      tests = D[cycle]
      for test in tests:
        print(*test, sep='\t')
于 2013-06-18T17:36:24.980 回答