python - 格式化字符串未使用的命名参数

Question

假设我有：

action = '{bond}, {james} {bond}'.format(bond='bond', james='james')

这将输出：

'bond, james bond'

接下来我们有：

 action = '{bond}, {james} {bond}'.format(bond='bond')

这将输出：

KeyError: 'james'

是否有一些解决方法可以防止发生此错误，例如：

if keyrror: ignore，别管它（但要解析其他）
将格式字符串与可用的命名参数进行比较，如果缺少则添加

score 109 · Accepted Answer

如果您使用的是 Python 3.2+，则可以使用str.format_map()。

对于bond, bond：

>>> from collections import defaultdict
>>> '{bond}, {james} {bond}'.format_map(defaultdict(str, bond='bond'))
'bond,  bond'

对于bond, {james} bond：

>>> class SafeDict(dict):
...     def __missing__(self, key):
...         return '{' + key + '}'
...
>>> '{bond}, {james} {bond}'.format_map(SafeDict(bond='bond'))
'bond, {james} bond'

在 Python 2.6/2.7 中

对于bond, bond：

>>> from collections import defaultdict
>>> import string
>>> string.Formatter().vformat('{bond}, {james} {bond}', (), defaultdict(str, bond='bond'))
'bond,  bond'

对于bond, {james} bond：

>>> from collections import defaultdict
>>> import string
>>>
>>> class SafeDict(dict):
...     def __missing__(self, key):
...         return '{' + key + '}'
...
>>> string.Formatter().vformat('{bond}, {james} {bond}', (), SafeDict(bond='bond'))
'bond, {james} bond'

score 28 · Accepted Answer

您可以在该方法中使用模板字符串safe_substitute。

from string import Template

tpl = Template('$bond, $james $bond')
action = tpl.safe_substitute({'bond': 'bond'})

score 15 · Accepted Answer

您可以遵循PEP 3101和子类 Formatter 中的建议：

from __future__ import print_function
import string

class MyFormatter(string.Formatter):
    def __init__(self, default='{{{0}}}'):
        self.default=default

    def get_value(self, key, args, kwds):
        if isinstance(key, str):
            return kwds.get(key, self.default.format(key))
        else:
            return string.Formatter.get_value(key, args, kwds)

现在试试：

>>> fmt=MyFormatter()
>>> fmt.format("{bond}, {james} {bond}", bond='bond', james='james')
'bond, james bond'
>>> fmt.format("{bond}, {james} {bond}", bond='bond')
'bond, {james} bond'

self.default您可以通过将文本更改为您希望为 KeyErrors 显示的内容来更改关键错误的标记方式：

>>> fmt=MyFormatter('">>{{{0}}} KeyError<<"')
>>> fmt.format("{bond}, {james} {bond}", bond='bond', james='james')
'bond, james bond'
>>> fmt.format("{bond}, {james} {bond}", bond='bond')
'bond, ">>{james} KeyError<<" bond'

代码在 Python 2.6、2.7 和 3.0+ 上保持不变

score 10 · Accepted Answer

也可以做简单易读的，虽然有点傻：

'{bar}, {fro} {bar}'.format(bar='bar', fro='{fro}')

我知道这个答案需要了解预期的键，但我正在寻找一个简单的两步替换（首先说问题名称，然后是循环中的问题索引）并且创建一个完整的类或不可读的代码比需要的更复杂。

score 9 · Accepted Answer

falsetru 的答案巧妙地使用了带有的默认字典vformat()，而dawg 的答案可能更符合 Python 的文档，但既不处理复合字段名称（例如，使用显式转换 ( !r) 或格式规范 ( :+10g)。

例如，使用 falsetru 的 SafeDict：

>>> string.Formatter().vformat('{one} {one:x} {one:10f} {two!r} {two[0]}', (), SafeDict(one=215, two=['James', 'Bond']))
"215 d7 215.000000 ['James', 'Bond'] James"
>>> string.Formatter().vformat('{one} {one:x} {one:10f} {two!r} {two[0]}', (), SafeDict(one=215))
"215 d7 215.000000 '{two}' {"

并使用 dawg 的 MyFormatter：

>>> MyFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}', one=215, two=['James', 'Bond'])
"215 d7 215.000000 ['James', 'Bond'] James"
>>> MyFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}', one=215)
"215 d7 215.000000 '{two}' {"

在第二种情况下都不能很好地工作，因为值查找 (in get_value()) 已经剥离了格式规范。相反，您可以重新定义vformat()或parse()使这些规范可用。我下面的解决方案通过重新定义来执行此操作，vformat()因此它执行密钥查找，如果缺少密钥，则使用双括号（例如{{two!r}}）转义格式字符串，然后执行正常的vformat().

class SafeFormatter(string.Formatter):
    def vformat(self, format_string, args, kwargs):
        args_len = len(args)  # for checking IndexError
        tokens = []
        for (lit, name, spec, conv) in self.parse(format_string):
            # re-escape braces that parse() unescaped
            lit = lit.replace('{', '{{').replace('}', '}}')
            # only lit is non-None at the end of the string
            if name is None:
                tokens.append(lit)
            else:
                # but conv and spec are None if unused
                conv = '!' + conv if conv else ''
                spec = ':' + spec if spec else ''
                # name includes indexing ([blah]) and attributes (.blah)
                # so get just the first part
                fp = name.split('[')[0].split('.')[0]
                # treat as normal if fp is empty (an implicit
                # positional arg), a digit (an explicit positional
                # arg) or if it is in kwargs
                if not fp or fp.isdigit() or fp in kwargs:
                    tokens.extend([lit, '{', name, conv, spec, '}'])
                # otherwise escape the braces
                else:
                    tokens.extend([lit, '{{', name, conv, spec, '}}'])
        format_string = ''.join(tokens)  # put the string back together
        # finally call the default formatter
        return string.Formatter.vformat(self, format_string, args, kwargs)

这是它的实际应用：

>>> SafeFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}', one=215, two=['James', 'Bond'])
"215 d7 215.000000 ['James', 'Bond'] James"
>>> SafeFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}', one=215)
'215 d7 215.000000 {two!r} {two[0]}'
>>> SafeFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}')
'{one} {one:x} {one:10f} {two!r} {two[0]}'
>>> SafeFormatter().format('{one} {one:x} {one:10f} {two!r} {two[0]}', two=['James', 'Bond'])
"{one} {one:x} {one:10f} ['James', 'Bond'] James"

这个解决方案有点太老套了（也许重新定义parse()会有更少的问题），但应该适用于更多的格式化字符串。

score 3 · Accepted Answer

在逐步填充格式字符串（例如，对于 SQL 查询）时，需要部分填充格式字符串是一个常见问题。

format_partial()方法使用Formatterfromstring和ast解析格式字符串，并找出命名参数哈希是否具有部分评估格式所需的所有值：

import ast
from collections import defaultdict
from itertools import chain, ifilter, imap
from operator import itemgetter
import re
from string import Formatter

def format_partial(fstr, **kwargs):
    def can_resolve(expr, **kwargs):
        walk = chain.from_iterable(imap(ast.iter_fields, ast.walk(ast.parse(expr))))
        return all(v in kwargs for k,v in ifilter(lambda (k,v): k=='id', walk))

    ostr = fstr
    fmtr = Formatter()
    dd = defaultdict(int)
    fmtr.get_field = lambda field_name, args, kwargs: (dd[field_name],field_name)
    fmtr.check_unused_args = lambda used_args, args, kwargs: all(v in dd for v in used_args)
    for t in ifilter(itemgetter(1), Formatter().parse(fstr)):
        f = '{'+t[1]+(':'+t[2] if t[2] else '')+'}'
        dd = defaultdict(int)
        fmtr.format(f,**kwargs)
        if all(can_resolve(e,**kwargs) for e in dd):
            ostr = re.sub(re.escape(f),Formatter().format(f, **kwargs),ostr,count=1)
    return ostr

format_partial将保留格式字符串的未解析部分，因此可以在数据可用时使用后续调用来解析这些部分。

goodmami 和 dawg 的答案似乎更清晰，但它们都未能完全捕获格式迷你语言，如{x:>{x}}; format_partial解析任何可以解析的格式字符串都没有问题string.format()：

from datetime import date
format_partial('{x} {} {y[1]:x} {x:>{x}} {z.year}', **{'x':30, 'y':[1,2], 'z':date.today()})

'30 {} 2                             30 2016'

使用正则表达式而不是字符串格式化程序将功能扩展到旧式格式字符串甚至更容易，因为旧式格式子字符串是常规的（即没有嵌套标记）。

score 2 · Accepted Answer

这是使用 python27 的另一种方法：

action = '{bond}, {james} {bond}'
d = dict((x[1], '') for x in action._formatter_parser())
# Now we have: `d = {'james': '', 'bond': ''}`.
d.update(bond='bond')
print action.format(**d)  # bond,  bond

score 1 · Accepted Answer

对于 Python 3，采用批准的答案，这是一个不错的、紧凑的 Pythonic 实现：

def safeformat(str, **kwargs):
    class SafeDict(dict):
        def __missing__(self, key):
            return '{' + key + '}'
    replacements = SafeDict(**kwargs)
    return str.format_map(replacements)

# In [1]: safeformat("a: {a}, b: {b}, c: {c}", a="A", c="C", d="D")
# Out[1]: 'a: A, b: {b}, c: C'

score 1 · Accepted Answer

基于其他一些答案，我扩展了解决方案。这将处理带有格式规范的字符串"{a:<10}"。

我发现 selenium 日志记录中的一些字符串导致 vformat（和 format_map）达到递归限制。我还想确保我可以处理也存在空花括号的字符串。

def partialformat(s: str, recursionlimit: int = 10, **kwargs):
    """
    vformat does the acutal work of formatting strings. _vformat is the 
    internal call to vformat and has the ability to alter the recursion 
    limit of how many embedded curly braces to handle. But for some reason 
    vformat does not.  vformat also sets the limit to 2!   

    The 2nd argument of _vformat 'args' allows us to pass in a string which 
    contains an empty curly brace set and ignore them.
    """

    class FormatPlaceholder:
        def __init__(self, key):
            self.key = key

        def __format__(self, spec):
            result = self.key
            if spec:
                result += ":" + spec
            return "{" + result + "}"

    class FormatDict(dict):
        def __missing__(self, key):
            return FormatPlaceholder(key)

    class PartialFormatter(string.Formatter):
        def get_field(self, field_name, args, kwargs):
            try:
                obj, first = super(PartialFormatter, self).get_field(field_name, args, kwargs)
            except (IndexError, KeyError, AttributeError):
                first, rest = formatter_field_name_split(field_name)
                obj = '{' + field_name + '}'

                # loop through the rest of the field_name, doing
                #  getattr or getitem as needed
                for is_attr, i in rest:
                    if is_attr:
                        try:
                            obj = getattr(obj, i)
                        except AttributeError as exc:
                            pass
                    else:
                        obj = obj[i]

            return obj, first

    fmttr = string.Formatter()
    fs, _ = fmttr._vformat(s, ("{}",), FormatDict(**kwargs), set(), recursionlimit)
    return fs

class ColorObj(object):
    blue = "^BLUE^"
s = '{"a": {"b": {"c": {"d" : {} {foo:<12} & {foo!r} {arg} {color.blue:<10} {color.pink} {blah.atr} }}}}'
print(partialformat(s, foo="Fooolery", arg="ARRrrrrrg!", color=ColorObj))

输出：

{"a": {"b": {"c": {"d" : {} Fooolery             & 'Fooolery' Fooolery ARRrrrrrg! ^BLUE^ {color.pink} {blah.atr} }}}}

python - 格式化字符串未使用的命名参数

9 回答 9

如果您使用的是 Python 3.2+，则可以使用str.format_map()。

在 Python 2.6/2.7 中

Related

Reference