前段时间我做了下面的代码,它与格式相反,但仅限于我需要的情况。
而且,我从未尝试过,但我认为这也是parse library
我的代码:
import string
import re
_def_re = '.+'
_int_re = '[0-9]+'
_float_re = '[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?'
_spec_char = '[\^$.|?*+()'
def format_parse(text, pattern):
"""
Scan `text` using the string.format-type `pattern`
If `text` is not a string but iterable return a list of parsed elements
All format-like pattern cannot be process:
- variable name cannot repeat (even unspecified ones s.t. '{}_{0}')
- alignment is not taken into account
- only the following variable types are recognized:
'd' look for and returns an integer
'f' look for and returns a float
Examples::
res = format_parse('the depth is -42.13', 'the {name} is {value:f}')
print res
print type(res['value'])
# {'name': 'depth', 'value': -42.13}
# <type 'float'>
print 'the {name} is {value:f}'.format(**res)
# 'the depth is -42.130000'
# Ex2: without given variable name and and invalid item (2nd)
versions = ['Version 1.4.0', 'Version 3,1,6', 'Version 0.1.0']
v = format_parse(versions, 'Version {:d}.{:d}.{:d}')
# v=[{0: 1, 1: 4, 2: 0}, None, {0: 0, 1: 1, 2: 0}]
"""
# convert pattern to suitable regular expression & variable name
v_int = 0 # available integer variable name for unnamed variable
cur_g = 0 # indices of current regexp group name
n_map = {} # map variable name (keys) to regexp group name (values)
v_cvt = {} # (optional) type conversion function attached to variable name
rpattern = '^' # stores to regexp pattern related to format pattern
for txt,vname, spec, conv in string.Formatter().parse(pattern):
# process variable name
if len(vname)==0:
vname = v_int
v_int += 1
if vname not in n_map:
gname = '_'+str(cur_g)
n_map[vname] = gname
cur_g += 1
else:
gname = n_map[vname]
# process type of required variables
if 'd' in spec: vtype = _int_re; v_cvt[vname] = int
elif 'f' in spec: vtype = _float_re; v_cvt[vname] = float
else: vtype = _def_re;
# check for regexp special characters in txt (add '\' before)
txt = ''.join(map(lambda c: '\\'+c if c in _spec_char else c, txt))
rpattern += txt + '(?P<'+gname+'>' + vtype +')'
rpattern += '$'
# replace dictionary key from regexp group-name to the variable-name
def map_result(match):
if match is None: return None
match = match.groupdict()
match = dict((vname, match[gname]) for vname,gname in n_map.iteritems())
for vname, value in match.iteritems():
if vname in v_cvt:
match[vname] = v_cvt[vname](value)
return match
# parse pattern
if isinstance(text,basestring):
match = re.search(rpattern, text)
match = map_result(match)
else:
comp = re.compile(rpattern)
match = map(comp.search, text)
match = map(map_result, match)
return match
对于您的情况,这是一个使用示例:
versions = ['Version 1.4.0', 'Version 3.1.6', 'Version 0.1.0']
v = format_parse(versions, 'Version {:d}.{:d}.{:d}')
# v=[{0: 1, 1: 4, 2: 0}, {0: 3, 1: 1, 2: 6}, {0: 0, 1: 1, 2: 0}]
# to get the versions as a list of integer list, you can use:
v = [[vi[i] for i in range(3)] for vi in filter(None,v)]
注意filter(None,v)
删除不可解析的版本(返回无)。这里没有必要。