python - Tarjan 在 python 中的强连接组件算法不起作用

Question

根据维基百科，我用 Python 实现了 Tarjan 的强连接组件算法，但它不起作用。该算法很短，我找不到任何区别，所以我不知道为什么它不起作用。我试图检查原始文件，但找不到它。

这是代码。

def strongConnect(v):
  global E, idx, CCs, c, S
  idx[v] = (c, c) #idx[v][0] for v.index # idx[v][1] for v.lowlink
  c += 1
  S.append(v)  
  for w in [u for (v2, u) in E if v == v2]:
    if idx[w][0] < 0:
      strongConnect(w)
      # idx[w] = (idx[w][0], min(idx[v][1], idx[w][1])) #fixed, thx
      idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))
    elif w in S:
      idx[v] = (idx[v][0], min(idx[v][1], idx[w][0]))
  if (idx[v][0] == idx[v][1]):
    i = S.index(v)
    CCs.append(S[i:])
    S = S[:i]

E = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'), ('D', 'F'), ('F', 'B'), ('E', 'F')]
idx = {}
CCs = []
c = 0
S = []
for (u, v) in E:
  idx[u] = (-1, -1)
  idx[v] = (-1, -1)
for v in idx.keys():
  if idx[v][0] < 0:
    strongConnect(v)

print(CCs)

如果您愿意，可以直观地检查图表。如您所见，这是对维基百科中伪代码的非常正向的翻译。但是，这是输出：

[['D', 'E', 'F'], ['B', 'C'], ['A']]

应该只有一个强连通分量，而不是三个。我希望这个问题在各个方面都是正确的，如果不是，我很抱歉。无论如何，非常感谢你。

score 15 · Accepted Answer

好的，我有更多时间考虑这个问题。正如我之前所说，我不再确定过滤边缘是问题所在。事实上，我认为伪代码有歧义；是否for each (v, w) in E意味着每条边（正如字面意思所for each暗示的那样），或者只有每条以 , 开头的边v（如您合理假设的那样）？然后，在 for 循环之后，有问题的是循环v中的最终结果，就像在 Python 中一样？还是回到原来的样子？在这种情况下，伪代码没有明确定义的作用域行为！（如果最后的值是最后一个任意值，那就太奇怪了vforvvv从循环。这表明过滤是正确的，因为在这种情况下，v自始至终都意味着同一件事。）

但是，在任何情况下，您的代码中的明显错误都在这里：

  idx[w] = (idx[w][0], min(idx[v][1], idx[w][1]))

根据伪代码，那肯定是

  idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))

一旦你做出改变，你就会得到预期的结果。坦率地说，您犯了这个错误并不让我感到惊讶，因为您使用的是一种非常奇怪且违反直觉的数据结构。这就是我认为的改进——它只增加了几行，而且我发现它更具可读性。

import itertools

def strong_connect(vertex):
    global edges, indices, lowlinks, connected_components, index, stack
    indices[vertex] = index
    lowlinks[vertex] = index
    index += 1
    stack.append(vertex)

    for v, w in (e for e in edges if e[0] == vertex):
        if indices[w] < 0:
            strong_connect(w)
            lowlinks[v] = min(lowlinks[v], lowlinks[w])
        elif w in stack:
            lowlinks[v] = min(lowlinks[v], indices[w])

    if indices[vertex] == lowlinks[vertex]:
        connected_components.append([])
        while stack[-1] != vertex:
            connected_components[-1].append(stack.pop())
        connected_components[-1].append(stack.pop())

edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), 
         ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'), 
         ('D', 'F'), ('F', 'B'), ('E', 'F')]
vertices = set(v for v in itertools.chain(*edges))
indices = dict((v, -1) for v in vertices)
lowlinks = indices.copy()
connected_components = []

index = 0
stack = []
for v in vertices:
    if indices[v] < 0:
        strong_connect(v)

print(connected_components)

但是，我发现这里使用全局变量令人反感。您可以将其隐藏在它自己的模块中，但我更喜欢创建可调用类的想法。在仔细查看了 Tarjan 的原始伪代码（顺便说一下，它确认了“过滤”版本是正确的）之后，我写了这个。它包括一个简单的Graph类并进行几个基本测试：

from itertools import chain
from collections import defaultdict

class Graph(object):
    def __init__(self, edges, vertices=()):
        edges = list(list(x) for x in edges)
        self.edges = edges
        self.vertices = set(chain(*edges)).union(vertices)
        self.tails = defaultdict(list)
        for head, tail in self.edges:
            self.tails[head].append(tail)

    @classmethod
    def from_dict(cls, edge_dict):
        return cls((k, v) for k, vs in edge_dict.iteritems() for v in vs)

class _StrongCC(object):
    def strong_connect(self, head):
        lowlink, count, stack = self.lowlink, self.count, self.stack
        lowlink[head] = count[head] = self.counter = self.counter + 1
        stack.append(head)

        for tail in self.graph.tails[head]:
            if tail not in count:
                self.strong_connect(tail)
                lowlink[head] = min(lowlink[head], lowlink[tail])
            elif count[tail] < count[head]:
                if tail in self.stack:
                    lowlink[head] = min(lowlink[head], count[tail])

        if lowlink[head] == count[head]:
            component = []
            while stack and count[stack[-1]] >= count[head]:
                component.append(stack.pop())
            self.connected_components.append(component)

    def __call__(self, graph):
        self.graph = graph
        self.counter = 0
        self.count = dict()
        self.lowlink = dict()
        self.stack = []
        self.connected_components = []

        for v in self.graph.vertices:
            if v not in self.count:
                self.strong_connect(v)

        return self.connected_components

strongly_connected_components = _StrongCC()

if __name__ == '__main__':
    edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
             ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
             ('D', 'F'), ('F', 'B'), ('E', 'F')]
    print strongly_connected_components(Graph(edges))
    edge_dict = {'a':['b', 'c', 'd'],
                 'b':['c', 'a'],
                 'c':['d', 'e'],
                 'd':['e'],
                 'e':['c']}
    print strongly_connected_components(Graph.from_dict(edge_dict))

score 0 · Accepted Answer

我修改了 senderle 对 Python 3.6+ 的回答，并添加了类型提示和注释，这样对我来说更有意义。

from itertools import chain
from collections import defaultdict
from typing import Iterable, DefaultDict, List, Dict, Generic, TypeVar, Tuple, Set

T = TypeVar('T')  # label for a vertex

class Graph(Generic[T]):
    def __init__(self, edges: Iterable[Tuple[T, T]], vertices: Iterable[T] = ()):
        edges = [list(x) for x in edges]
        self.edges = edges
        self.vertices: Set[T] = set(chain(*edges)).union(vertices)
        self.adj_list: DefaultDict[T, List[T]] = defaultdict(list)  # i.e., neighbors of a given node
        for head, tail in self.edges:
            self.adj_list[head].append(tail)

    @classmethod
    def from_dict(cls, edge_dict: Dict[T, Iterable[T]]):
        return cls((k, v) for k, vs in edge_dict.items() for v in vs)

def strongly_connected_components(graph: Graph[T]) -> List[List[T]]:
    idx = 0  # index to be assigned to the next node
    node_idxs: Dict[T, int] = {}  # index of a visited node
    lowlink: Dict[T, int] = {}  # low-link number is the lowest node number (index) reachable by the node that is in the same connected component – its own number, or the low-link number of a previous unvisited neighbor, or the node number of a visited neighbor in the stack
    stack: List[T] = []
    connected_components: List[List[T]] = []

    def visit(head: T) -> None:
        nonlocal idx
        lowlink[head] = node_idxs[head] = idx
        idx += 1
        stack.append(head)

        for neighbor in graph.adj_list[head]:
            if neighbor not in node_idxs:  # i.e., not visited
                visit(neighbor)
                lowlink[head] = min(lowlink[head], lowlink[neighbor])
            elif node_idxs[neighbor] < node_idxs[head]:
                if neighbor in stack:
                    lowlink[head] = min(lowlink[head], node_idxs[neighbor])

        if lowlink[head] == node_idxs[head]:
            component: List[T] = []
            while stack and node_idxs[stack[-1]] >= node_idxs[head]:
                component.append(stack.pop())
            connected_components.append(component)

    for v in graph.vertices:
        if v not in node_idxs:
            visit(v)
    return connected_components

if __name__ == '__main__':
    edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
                ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
                ('D', 'F'), ('F', 'B'), ('E', 'F')]
    print(strongly_connected_components(Graph(edges)))  # [['F', 'D', 'C', 'B', 'A', 'E']]
    edge_dict = {'a':['b', 'c', 'd'],
                    'b':['c', 'a'],
                    'c':['d', 'e'],
                    'd':['e'],
                    'e':['c']}
    print(strongly_connected_components(Graph.from_dict(edge_dict)))  # [['e', 'd', 'c'], ['b', 'a']]

python - Tarjan 在 python 中的强连接组件算法不起作用

2 回答 2

Related

Reference