1

我需要在图像上绘制大型数据集,我使用带有所有可用工具(dot、neato、twopi .. 等)的 graphvis 命令行,但结果不可读并且包含重叠。

我需要的是在边缘显示带有标签的节点,重叠最少,这样图形就可以阅读,也可以打印在 A4 或 A3 纸上。

我在neato 中使用了选项overlap=false、splines=true 和相同的重叠结果。

这是数据集:

graph {
    graph [ bgcolor=ivory2, overlap=false, splines=true, ranksep ="2.75"]
    {node [width=1,height=1,shape=circle,style=filled,color=skyblue] "ECNY" }
    edge [ len=2, sep=5] 
    "DANA" -- "HMRN" -- "ECNY" -- "NORI" -- "MAJZ" -- "RSFH" -- "DANA" [label ="LD1-25-A01",  penwidth =3 , color="#156163"]
    "DANA" -- "HMRN" -- "ECNY" -- "NORI" -- "MAJZ" -- "RSFH" -- "DANA" [label ="LD1-25-A02",  penwidth =3 , color="#30a1f9"]
    "DANA" -- "MAJZ" -- "ECNY" -- "HMRN" -- "DANA" [label ="LD1-25-A03",  penwidth =3 , color="#ec591d"]
    "DANA" -- "MAJZ" -- "ECNY" -- "HMRN" -- "DANA" [label ="LD1-25-A04",  penwidth =3 , color="#263a5f"]
    "DANA" -- "ECNY" -- "DANA" [label ="LD3-25-A02",  penwidth =3 , color="#a3517c"]
    "HMRN" -- "ECNY" -- "MAJZ" -- "DANA" -- "HMRN" [label ="LD1-25-H01",  penwidth =3 , color="#800d83"]
    "HMRN" -- "ECNY" -- "MAJZ" -- "DANA" -- "TWRN" -- "HMRN" [label ="LD1-25-H02",  penwidth =3 , color="#89e15a"]
    "HMRN" -- "ECNY" -- "MAJZ" -- "DANA" -- "TWRN" -- "HMRN" [label ="LD3-25-H03",  penwidth =3 , color="#74ed0e"]
    "HMRN" -- "ECNY" -- "HMRN" [label ="JED-10-H08",  penwidth =3 , color="#e8e786"]
    "HMRN" -- "ECNY" -- "MAJZ" -- "DANA" -- "HMRN" [label ="LD1-25-H04",  penwidth =3 , color="#e1f559"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-10-A02",  penwidth =3 , color="#8f7964"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-10-A03",  penwidth =3 , color="#9058f0"]
    "ECNY" -- "HMRN" -- "SBCB" -- "ECNY" [label ="JED-10-A04",  penwidth =3 , color="#b537b7"]
    "ECNY" -- "HMRN" -- "SBCB" -- "ECNY" [label ="JED-10-A05",  penwidth =3 , color="#fc2c2a"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-10-A06",  penwidth =3 , color="#36309c"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-10-A07",  penwidth =3 , color="#25a571"]
    "ECNY" -- "OBHR" -- "ECNY" [label ="JED-10-A26",  penwidth =3 , color="#1a6077"]
    "ECNY" -- "2820" -- "ECNY" [label ="JED-25-A03",  penwidth =3 , color="#8bce8c"]
    "ECNY" -- "2138" -- "2129" -- "ECNY" [label ="JED-25-A04",  penwidth =3 , color="#9b9afa"]
    "ECNY" -- "2017" -- "2013" -- "ECNY" [label ="JED-25-A05",  penwidth =3 , color="#5ea9aa"]
    "ECNY" -- "2027" -- "2128" -- "ECNY" [label ="JED-25-A22",  penwidth =3 , color="#c0c4d4"]
    "ECNY" -- "2130" -- "2137" -- "ECNY" [label ="JED-25-A27",  penwidth =3 , color="#781ce0"]
    "ECNY" -- "DANA" -- "ECNY" [label ="LD3-25-A01",  penwidth =3 , color="#fd5c5a"]
    "ECNY" -- "HMRN" -- "ZJ01" -- "ECNY" [label ="JED-10-H03",  penwidth =3 , color="#32e13b"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-10-H04",  penwidth =3 , color="#487f94"]
    "ECNY" -- "2341" -- "2235" -- "2233" -- "ECNY" [label ="JED-10-H05",  penwidth =3 , color="#82ae2d"]
    "ECNY" -- "HMRN" -- "SBCB" -- "ECNY" [label ="JED-10-H06",  penwidth =3 , color="#f4651c"]
    "ECNY" -- "HMRN" -- "SBCB" -- "ECNY" [label ="JED-10-H07",  penwidth =3 , color="#23dd41"]
    "ECNY" -- "HMRN" -- "OBHR" -- "ECNY" [label ="JED-10-H37",  penwidth =3 , color="#521f43"]
    "ECNY" -- "PROJECT" -- "ECNY" [label ="JED-10-H49",  penwidth =3 , color="#0a4bf1"]
    "ECNY" -- "2234" -- "2246" -- "2245" -- "2320" -- "ECNY" [label ="JED-25-H01",  penwidth =3 , color="#6127e4"]
    "ECNY" -- "2842" -- "2030" -- "ECNY" [label ="JED-25-H03",  penwidth =3 , color="#ce1f98"]
    "ECNY" -- "2170" -- "2166" -- "ECNY" [label ="JED-25-H06",  penwidth =3 , color="#aeb0ce"]
    "ECNY" -- "2158" -- "2144" -- "ECNY" [label ="JED-25-H11",  penwidth =3 , color="#9ef618"]
    "ECNY" -- "5824" -- "2011" -- "ECNY" [label ="JED-25-H15",  penwidth =3 , color="#b2d524"]
    "ECNY" -- "2010" -- "2830" -- "2198" -- "ECNY" [label ="JED-25-H16",  penwidth =3 , color="#53e7ae"]
    "ECNY" -- "2179" -- "ECNY" [label ="JED-25-H17",  penwidth =3 , color="#149169"]
    "ECNY" -- "2211" -- "ECNY" [label ="JED-25-H19",  penwidth =3 , color="#15a51b"]
    "ECNY" -- "2316" -- "ECNY" [label ="JED-25-H20",  penwidth =3 , color="#e91d18"]
    "ECNY" -- "2203" -- "ECNY" [label ="JED-25-H22",  penwidth =3 , color="#38a23a"]
    "ECNY" -- "SBCB" -- "JFCC" -- "ECNY" [label ="JED-25-H33",  penwidth =3 , color="#a1abf4"]
    "ECNY" -- "HMRN" -- "ECNY" [label ="JED-25-H41",  penwidth =3 , color="#c14ff8"]
    "ECNY" -- "TAIF" -- "SNFN" -- "NORI" -- "ECNY" [label ="LD1-10-H04",  penwidth =3 , color="#75fb4f"]
    "ECNY" -- "MAJZ" -- "DANA" -- "TWRN" -- "HMRN" -- "ECNY" [label ="LD1-25-H03",  penwidth =3 , color="#52d1c8"]
    "ECNY" -- "DANA" -- "ECNY" [label ="LD3-25-H01",  penwidth =3 , color="#498a16"]
    "ECNY" -- "DANA" -- "ECNY" [label ="LD3-25-H02",  penwidth =3 , color="#70f831"]
    "ECNY" -- "2310" -- "5880" -- "5301" -- "2248" -- "ECNY" [label ="JED-10-H17",  penwidth =3 , color="#ebb4e1"]
    "SBCB" -- "ECNY" -- "HMRN" -- "JELS" -- "ROMN" -- "OBHR" -- "SAFA" -- "SBCB" [label ="JED-W40-A01",  penwidth =3 , color="#7ff59c"]
    "SBCB" -- "SAFA" -- "OBHR" -- "HMRN" -- "ECNY" -- "SBCB" [label ="JED-10-H35",  penwidth =3 , color="#e817b9"]
    "2171" -- "ECNY" -- "2171" [label ="JED-10-H19",  penwidth =3 , color="#bf1252"]
    "ABHA" -- "ECNY" -- "DANA" -- "ABHA" [label ="LD3-10-H01",  penwidth =3 , color="#8b60ae"]
    "MAJZ" -- "ECNY" -- "DANA" -- "2510" -- "MAJZ" [label ="LD1-10-H02",  penwidth =3 , color="#1e3c55"]
    "2209" -- "2206" -- "ECNY" -- "2209" [label ="JED-25-H02",  penwidth =3 , color="#1b6092"]
    }

这是使用这些选项时的输出之一(在neato中):

图 [ bgcolor=ivory2,重叠=假,样条线=真,rankep =“2.75”]

{节点 [width=1,height=1,shape=circle,style=filled,color=skyblue] "ECNY" }

边缘 [len=2, sep=5]

在此处输入图像描述

当使重叠=缩放但图像仍然不可读时也是如此!

重叠=比例,sep=\"+25,25\",样条线=true,rankdir=\"TB\"

在此处输入图像描述

那么还有哪些其他属性可以增强此图?

4

4 回答 4

4

不可避免地,随着节点和边数的增加,我们最终将没有足够的空间来拥有一个紧凑但清晰的图。但是,我认为我们可以做一些事情来改善问题。在这种情况下,我们可以利用所有边序列形成返回“ECNY”的循环这一事实。

首先,我对更接近“ECNY”的边缘进行加权(使它们更具弹性)并在默认情况下使它们更长(使图形的中间更加分散),反之,与更远的边缘相反来自“ECNY”。

其次,我只标记了每个循环一次,标签位于循环中尽可能远离“ECNY”的边缘。

我使用 Python 脚本以编程方式进行了这些更改adjweight.py,如下所示:

import sys, re, math

for line in sys.stdin:
    line = line.rstrip()
    if re.match(' +"',line):
        loop, attr = line.split(" [")
        loopli = loop.split(" -- ")[1:] 
        # We take all but the first node, to eliminate repeats
        # then we put the list of edges together with "ECNY" first and last.
        i = loopli.index('"ECNY"')
        loopli = loopli[i:] + loopli[:i] + [loopli[i]]
        n = len(loopli)

        # Now we write the multi-edge lines as individual lines so that we can 
        # give each edge an individual weight and length

        for i in range(n-1):
            # We weight edges that are furthest from the center (i.e. those with 
            # numbers closest to n/2) highest and those that are closest to the
            # center are weighted lowest.
            wt = (n/2)/(abs(n/2 - (i+1))+1)
            edgelength = 1/wt
            if i - n//2 + 1 != 0:
                # This edge is the furthest from the center in this loop, so we 
                # label this edge (but not the other edges in the loop)
                attr = ",".join(attr.split(",")[1:])
            # We raise the weights and edge lengths to different powers to adjust
            # the distribution of nodes across the "diameter" of the graph
            print ("    %s -- %s [weight = %f, len = %f, %s" % (
                loopli[i],loopli[i+1],wt**3,edgelength**2,attr))
    else:
        print (line)

...可以运行,问题中显示的文件在python adjweight.py <large.dot >large3.dot哪里。large.dot.dot

将边缘的字体大小更改为 10(与默认值 14 相比)并运行large3.dotneato我以下图表: 在此处输入图像描述

这使得节点在页面周围分布得更均匀,边缘标签也少了很多,所以这对我来说似乎是一种改进。边缘标签仍然与一些节点和(特别是在图的右上角)其他边缘标签重叠,因为该overlap参数仅影响节点和边缘线,而不影响边缘标签。

调整与“ECNY”的距离相关的边的相对长度和权重可能有助于进一步改进图形。

于 2013-05-01T02:22:51.623 回答
1

我需要可视化从某个节点到其他节点的所有路径,所以我关于可视化节点的问题不是简化。 好吧 ,您已经可视化了从某个节点到其他节点的所有路径但是正如您所说的**结果不可读并且包含重叠**。我认为您需要的是使这个可视化清晰我的下一个建议是尝试使用(3维图)在小数据上手动可视化您的图表,然后对其进行编码或尝试找到属性以在 3d 上可视化它,如果它在小数据上运行良好数据。

我只是试着思考如何帮助解决这个问题,所以所有这些只是建议和开放式问题,让您找到问题的答案。

于 2013-05-04T11:36:52.567 回答
0

我建议使用具有流行的力导向布局算法(http://en.wikipedia.org/wiki/Force-directed_graph_drawing)的软件/框架,其中最受欢迎的是 Fruchterman-Reingold 和 Kamada-Kawai。我在igraph 包中使用了 R ( http://www.r-project.org/ ) 。我知道这有点不回答你的问题,因为你说你使用的是 graphvis,但使用力导向算法是你最好的选择。

于 2013-09-23T19:48:06.400 回答
0

为什么你不首先尝试减少图表上的数据(即:即使你得到了实现 - 很难遵循图表)并且在图表上表示数据的主要思想是简化并且你会丢失它)所以我的建议尝试减少图表上的数据。

于 2013-05-04T11:01:27.060 回答