干得好。为了让事情变得更简单,我最终没有使用正则表达式。事实上,到目前为止,我所看到的文件样本并不复杂——值得一提。如果文件的其他部分可以具有更复杂的结构,它可能更值得。
我也不确定您使用的是 Python 3 还是 Python 2,所以我尝试以一种可以同时使用两者的方式编写它:
from collections import defaultdict
class ParseLoadsError(Exception):
"""Exception raised for malformatted load files."""
def __init__(self, lineno, line, message):
super(ParseLoadsError, self).__init__(lineno, line, message)
def __str__(self):
return 'parse error on line {}: {!r}; {}'.format(*self.args)
def parse_loads_file(fileobj):
"""Parse a <whatever> file.
Currently just returns non-uniform loads. Parsing other
file sections is left as an exercise.
"""
result = {'non_uniform_loads': []}
line_iterator = ((idx, l.strip()) for idx, l in enumerate(fileobj))
for lineno, line in line_iterator:
line = line.strip()
if line == '[NON-UNIFORM LOADS]':
# Read the enter [NON-UNIFORM LOADS] section
# We pass it line_iterator so it advances the
# same iterator while reading
result['non_uniform_loads'].append(_parse_non_uniform_loads(line_iterator))
return result
def _parse_variable_map(lineno, line):
"""Parse a single <values> = <varnames> mapping.
This file format uses a format for mapping one or more values
to one or more variable names in the format::
N_1 N_2 N_3 ... N_n = A_1, A_2, A_33, ..., A_n
Where N_i are always either integers or floating-point values, and
A_i is the variable name associated with A_i. The A_i may contain
spaces, but whitespace is otherwise irrelevant.
Of course, if other types of values may occur in other sections of
the file this may be slightly more complicated. This also assumes
these lines are always well-formed. If not, additional logic may be
required to handle misshapen variables maps.
"""
try:
values, varnames = line.split('=')
values = (float(v.strip()) for v in values.split())
varnames = (n.strip() for n in varnames.split(','))
return dict(zip(varnames, values))
except ValueError:
raise
raise ParseLoadsError(lineno, line,
"expected format N_1 N_2 ... N_n = A_1, A_2, ..., A_n")
def _parse_non_uniform_loads(lines):
lineno, line = next(lines)
# The first line of a non-uniform loads section
# describes the number of loads
try:
n_loads = int(_parse_variable_map(lineno, line)['number of items'])
except KeyError:
raise ParseLoadsError(lineno, line, "expected 'N = number of items'")
# Parse loads returns a load_name/load_data, tuple so this returns
# a dict mapping load_name to load_data for each load
loads = dict(_parse_load(lines) for _ in range(n_loads))
lineno, line = next(lines)
if line != '[END OF NON-UNIFORM LOADS]':
raise ParseLoadsError(lineno, line, "expected '[END OF NON-UNIFORM LOADS]'")
return loads
def _parse_load(lines):
"""Parses a single load section."""
_, load_name = next(lines)
# Next there appears some additional metadata about the load
load_data = _parse_variable_map(*next(lines))
# Then the number of coordinates
lineno, line = next(lines)
try:
n_coords = int(_parse_variable_map(lineno, line)['Number of co-ordinates'])
except KeyError:
raise ParseLoadsError(lineno, line, "expected 'N = Number of co-ordinates'")
coordinates = defaultdict(list)
for _ in range(n_coords):
for c, v in _parse_variable_map(*next(lines)).items():
coordinates[c].append(v)
load_data['Coordinates'] = dict(coordinates)
return load_name, load_data
示例用法:
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
example_file = StringIO("""...previous file content
[NON-UNIFORM LOADS]
3 = number of items
Load 1
0 17.50 20.00 0 0 = Time, Gamma dry, Gamma wet, Temporary, Endtime
6 = Number of co-ordinates
0.000 0.000 = X, Y
20.000 0.000 = X, Y
40.000 2.000 = X, Y
80.000 2.000 = X, Y
100.000 0.000 = X, Y
120.000 0.000 = X, Y
Compensation load
200 17.50 20.00 0 0 = Time, Gamma dry, Gamma wet, Temporary, Endtime
19 = Number of co-ordinates
20.000 0.000 = X, Y
20.000 1.198 = X, Y
25.000 2.763 = X, Y
30.000 3.785 = X, Y
35.000 4.617 = X, Y
40.000 5.324 = X, Y
45.000 5.418 = X, Y
50.000 5.454 = X, Y
55.000 5.467 = X, Y
60.000 5.471 = X, Y
65.000 5.467 = X, Y
70.000 5.454 = X, Y
75.000 5.418 = X, Y
80.000 5.324 = X, Y
85.000 4.617 = X, Y
90.000 3.785 = X, Y
95.000 2.763 = X, Y
100.000 1.198 = X, Y
100.000 0.000 = X, Y
Compensation load 2
200 17.50 20.00 0 0 = Time, Gamma dry, Gamma wet, Temporary, Endtime
3 = Number of co-ordinates
0.000 0.000 = X, Y
20.000 10.000 = X, Y
20.000 0.000 = X, Y
[END OF NON-UNIFORM LOADS]
... subsequent file content""")
# To use an actual file here you might do something like
# with open(filename) as fobj:
# parse_loads_file(fobj)
parse_loads_file(example_file)
输出:
{'non_uniform_loads': [{'Compensation load': {'Coordinates': {'X': [20.0,
20.0,
25.0,
30.0,
35.0,
40.0,
45.0,
50.0,
55.0,
60.0,
65.0,
70.0,
75.0,
80.0,
85.0,
90.0,
95.0,
100.0,
100.0],
'Y': [0.0,
1.198,
2.763,
3.785,
4.617,
5.324,
5.418,
5.454,
5.467,
5.471,
5.467,
5.454,
5.418,
5.324,
4.617,
3.785,
2.763,
1.198,
0.0]},
'Endtime': 0.0,
'Gamma dry': 17.5,
'Gamma wet': 20.0,
'Temporary': 0.0,
'Time': 200.0},
'Compensation load 2': {'Coordinates': {'X': [0.0, 20.0, 20.0],
'Y': [0.0, 10.0, 0.0]},
'Endtime': 0.0,
'Gamma dry': 17.5,
'Gamma wet': 20.0,
'Temporary': 0.0,
'Time': 200.0},
'Load 1': {'Coordinates': {'X': [0.0, 20.0, 40.0, 80.0, 100.0, 120.0],
'Y': [0.0, 0.0, 2.0, 2.0, 0.0, 0.0]},
'Endtime': 0.0,
'Gamma dry': 17.5,
'Gamma wet': 20.0,
'Temporary': 0.0,
'Time': 0.0}}]}
我不确定单个文件是否可以包含多个[NON-UNIFORM LOADS]部分,因此我将每个此类部分的内容附加到列表 ( {'non_uniform_loads': []) 中。但是,如果只有一个,那么您可以取消列表并只设置result['non_uniform_loads'] = _parse_non_uniform_loads(line_iterator).