python-3.x - NLP 中标记元素的可视化（折叠、展开、突出显示）

Question

嗨，所有 NLP 研究人员，

我有一个句子，其中一些标记使用自定义标记集（例如 SUB、PRD、OBJ 等）进行标记。这些标签的偏移信息以 XML 样式的文件格式 (XMI) 表示，如下所示。

<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0"/>
<type2:DocumentMetaData xmi:id="1" sofa="12" begin="0" end="28" language="x-unspecified" documentTitle="visualization-example2.txt" documentId="admin" documentUri="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/visualization-example2.txt" collectionId="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/" documentBaseUri="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/" isLastSegment="false"/>
<type4:Sentence xmi:id="19" sofa="12" begin="0" end="28"/>
<type4:Token xmi:id="23" sofa="12" begin="0" end="1"/>
<type4:Token xmi:id="32" sofa="12" begin="2" end="6"/>
<type4:Token xmi:id="41" sofa="12" begin="7" end="8"/>
<type4:Token xmi:id="50" sofa="12" begin="9" end="12"/>
<type4:Token xmi:id="59" sofa="12" begin="13" end="17"/>
<type4:Token xmi:id="68" sofa="12" begin="18" end="22"/>
<type4:Token xmi:id="77" sofa="12" begin="23" end="27"/>
<type4:Token xmi:id="86" sofa="12" begin="27" end="28"/>
<chunk:Chunk xmi:id="95" sofa="12" begin="0" end="1" chunkValue="SUB"/>
<chunk:Chunk xmi:id="100" sofa="12" begin="2" end="28" chunkValue="PRD"/>
<chunk:Chunk xmi:id="105" sofa="12" begin="2" end="6" chunkValue="VERB"/>
<chunk:Chunk xmi:id="110" sofa="12" begin="7" end="27" chunkValue="OBJ"/>
<chunk:Chunk xmi:id="115" sofa="12" begin="7" end="12" chunkValue="HED"/>
<chunk:Chunk xmi:id="120" sofa="12" begin="13" end="27" chunkValue="PP"/>
<type2:TagsetDescription xmi:id="125" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency" name="UD Universal Dependencies"/>
<type2:TagsetDescription xmi:id="132" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" name="Named Entity tags"/>
<type2:TagsetDescription xmi:id="139" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation" name="Operation"/>
<type2:TagsetDescription xmi:id="146" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" name="UD Universal POS tags"/>
<cas:Sofa xmi:id="12" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="I want a dog with long hair."/>
<cas:View sofa="12" members="1 19 23 32 41 50 59 68 77 86 95 100 105 110 115 120 125 132 139 146"/></xmi:XMI>

我想要做的是像下面那样可视化这些标签。 " I want a dog with long hair" |_SUB_| |___________PRD_________________| |_VERB_| |________OBJ___________| |__HED__||_____PP______|
另外，我想在每个层次结构中折叠和展开这些标签，例如 PRD-> VERB OBJ；OBJ-> HED PP。
另外，当将光标悬停在特定标签上时，我想突出显示句子上每个标签覆盖的跨度。（所以，最好在 GUI 环境中显示）

基本上，它是一个二叉树结构，所以我在Python中寻找了一些相关的可视化包，例如Dash和Plotly，但它似乎不太适合我的特殊需求。

对于这项任务，我真的很感激任何建议，任何提示都会对我很有帮助。谢谢。

score 0 · Accepted Answer

您可以使用BALKANGraph javascript 图表库来实现请求的功能

OrgChart JS 支持展开/折叠

我不确定您要突出显示的确切内容，但是在下面的演示中，仅突出显示了父节点，您可以将其用作实现自己的逻辑的起点

        OrgChart.templates.sentence = Object.assign({}, OrgChart.templates.ana);
        OrgChart.templates.sentence.size = [520, 120];
        OrgChart.templates.sentence.field_0 = '<text class="field_0"  style="font-size: 24px;" fill="#ffffff" x="260" y="90" text-anchor="middle">{val}</text>';
        OrgChart.templates.sentence.field_1 = '<text class="field_1"  style="font-size: 16px;" fill="#ffffff" x="500" y="30" text-anchor="end">{val}</text>';
        OrgChart.templates.sentence.node = '<rect x="0" y="0" height="120" width="520" fill="#039BE5" stroke-width="1" stroke="#aeaeae" rx="5" ry="5"></rect>';


            var chart = new OrgChart(document.getElementById("tree"), {
                nodeBinding: {
                    field_0: "type",
                    field_1: "text"                    
                },
                orientation: BALKANGraph.orientation.top_left,
                tags: {
                    "sentence": {
                        template: "sentence"
                    }
                },
                links: [
                    { from: 2, to: 1 },
                    { from: 3, to: 1 },
                    { from: 4, to: 3 },
                    { from: 5, to: 3 },
                    { from: 6, to: 5 },
                    { from: 7, to: 5 }
                ],
                nodes: [
                    { id: 1, text: "I want a dog  with long hair", type:"SENTENCE", tags: ["sentence"] },
                    { id: 2, text: "I", type: "SUB" },
                    { id: 3, text: "want a dog with long hair", type: "PRD" },
                    { id: 4, text: "want", type: "VERB" },
                    { id: 5, text: "a dog  with long hair", type: "OBJ" },
                    { id: 6, text: "a dog", type: "HED" },
                    { id: 7, text: "with long hair", type: "PP"  }
                ]
            });

            var nodeEelements = chart.getNodeElements();
            for (var i = 0; i < nodeEelements.length; i++) {
                nodeEelements[i].addEventListener("mouseover", function () {
                    this.classList.add("highlight");
                    var nodeId = this.getAttribute("node-id");
                    var parent = chart.nodes[nodeId].parent;
                    if (parent != null) {
                        chart.getNodeElement(parent.id).classList.add("highlight");
                    }
                });

                nodeEelements[i].addEventListener("mouseleave", function () {
                    this.classList.remove("highlight");
                    var nodeId = this.getAttribute("node-id");
                    var parent = chart.nodes[nodeId].parent;
                    if (parent != null) {
                        chart.getNodeElement(parent.id).classList.remove("highlight");
                    }
                });
            }

        html, body {
            margin: 0px;
            padding: 0px;
            width: 100%;
            height: 100%;
            overflow: hidden;
            text-align: center;
            font-family: Helvetica;
        }

        #tree {
            width: 100%;
            height: 100%;
        }

        .highlight rect{
            fill: #F57C00 !important;
        }

<script src="https://balkangraph.com/js/latest/OrgChart.js"></script>

    <div id="tree"></div>

python-3.x - NLP 中标记元素的可视化（折叠、展开、突出显示）

1 回答 1

Related

Reference