4

我有一个清单:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']

我需要提取 的所有邻居'>>'并将它们分成组,其中它们之间的元素既不是 也不'>>''>>'.

对于示例列表,预期结果将是:

[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

我已经尝试了很多事情,但是所有简单的事情都以某种方式失败了。目前唯一可行的代码是:

def func(L,N):
    outer=[]
    inner=[]
    for i,e in enumerate(L):
        if e!=N:
            try:
                if L[i-1]==N or L[i+1]==N:
                    inner.append(e)
                elif len(inner)>0:
                    outer.append(inner)
                    inner=[] 
            except IndexError:
                pass
    if len(inner):
        outer.append(inner)
    return outer

func(l,'>>')

Out[196]:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

虽然它似乎有效,但我想知道是否有更好、更清洁的方法来做到这一点?

4

6 回答 6

2

这是另一种选择:

import itertools

def func(L, N):
    def key(i_e):
        i, e = i_e
        return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
    outer = []
    for k, g in itertools.groupby(enumerate(L), key):
        if k:
            outer.append([e for i, e in g if e != N])
    return outer

或具有嵌套列表理解的等效版本:

def func(L, N):
    def key(i_e):
        i, e = i_e
        return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
    return [[e for i, e in g if e != N] 
                for k, g in itertools.groupby(enumerate(L), key) if k]
于 2012-10-29T20:54:47.890 回答
2

你可以像这样简化它

l = ['']+l+['']
stack = []
connected = last_connected = False
for i, item in enumerate(l):
    if item in ['','>>']: continue
    connected = l[i-1] == '>>' or  l[i+1] == '>>'
    if connected:
        if not last_connected:
            stack.append([])
        stack[-1].append(item)
    last_connected = connected
于 2012-10-29T21:01:23.117 回答
2

我认为最 Pythonic 和易于阅读的解决方案是这样的:

import itertools

def neighbours(items, fill=None):
    """Yeild the elements with their neighbours as (before, element, after).

    neighbours([1, 2, 3]) --> (None, 1, 2), (1, 2, 3), (2, 3, None)

    """
    before = itertools.chain([fill], items)
    after = itertools.chain(items, [fill]) #You could use itertools.zip_longest() later instead.
    next(after)
    return zip(before, items, after)

def split_not_neighbour(seq, mark):
    """Split the sequence on each item where the item is not the mark, or next
    to the mark.

    split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)

    """
    output = []
    for items in neighbours(seq):
        if mark in items:
            _, item, _ = items
            if item != mark:
                output.append(item)
        else:
            if output:
                yield output
                output = []
    if output:
        yield output

我们可以这样使用:

>>> l = ['a', '>>', 'b', '>>', 'd', 'e', 'f', 'g', '>>', 'i', '>>', '>>',
...      'j', 'k', 'l', '>>', '>>']
>>> print(list(split_not_neighbour(l, ">>")))
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

请注意避免任何直接索引。

编辑:更优雅的版本。

def split_not_neighbour(seq, mark):
    """Split the sequence on each item where the item is not the mark, or next
    to the mark.

    split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)

    """
    neighboured = neighbours(seq)
    for _, items in itertools.groupby(neighboured, key=lambda x: mark not in x):
        yield [item for _, item, _ in items if item != mark]
于 2012-10-29T21:01:22.930 回答
0

我天真的尝试

things = (''.join(l)).split('>>')

output = []
inner = []

for i in things:
    if not i:
        continue
    i_len = len(i)
    if i_len == 1:
        inner.append(i)
    elif i_len > 1:
        inner.append(i[0])
        output.append(inner)
        inner = [i[-1]]

output.append(inner)
print output # [['a', 'b', 'd'], ['g', 'i', 'j'], ['l']] 
于 2012-10-29T21:08:04.773 回答
0

像这样的东西:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[]
for i,x in enumerate(l):
    if len(x)==1:
        if len(lis)!=0:
            lis[-1].append(x[0])
        else:
            lis.append([])
            lis[-1].append(x[0])
    else:
        if len(lis)!=0:
            lis[-1].append(x[0])
            lis.append([])
            lis[-1].append(x[-1])
        else:
            lis.append([])    
            lis[-1].append(x[0])
            lis.append([])
            lis[-1].append(x[-1])

print lis

输出:

[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

或者:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[[] for _ in range(len([1 for x in l if len(x)>1])+1)]
for i,x in enumerate(l):
    if len(x)==1:
        for y in reversed(lis):
            if len(y)!=0:
                y.append(x)
                break
        else:
            lis[0].append(x)
    else:
        if not all(len(x)==0 for x in lis):
            for y in reversed(lis):
                if len(y)!=0:
                    y.append(x[0])
                    break
            for y in lis:
                if len(y)==0:
                    y.append(x[-1])
                    break    
        else:
            lis[0].append(x[0])
            lis[1].append(x[-1])

print lis

输出:

[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
于 2012-10-29T23:58:00.073 回答
0

使用原始列表叠加的另一种方法

import copy

lis_dup = copy.deepcopy(lis)

lis_dup.insert(0,'')
prev_in = 0
tmp=[]
res = []

for (x,y) in zip(lis,lis_dup):
    if '>>' in (x,y):
        if y!='>>' :
            if y not in tmp:
                tmp.append(y)
        elif x!='>>':
            if x not in tmp:
                print 'x is ' ,x
                tmp.append(x)
        else:
            if prev_in ==1:
                res.append(tmp)
                prev_in =0
                tmp = []
        prev_in  = 1
    else:
        if prev_in == 1:
            res.append(tmp)
            prev_in =0
            tmp = []
res.append(tmp)

print res
于 2013-01-09T11:50:40.603 回答