1

我也是 Vega 和 Kibana 的新手,我试图创建一个散点图来显示主题标签及其平均极性,但是我遇到了两个方面,首先是聚合平均极性,其次是从文档中访问主题标签文本字段。

我试图获得平均极性的代码(现在只是在时间尺度上显示):

      {$schema: https://vega.github.io/schema/vega-lite/v2.json
  data: {
    # URL object is a context-aware query to Elasticsearch
    url: {
      # The %-enclosed keys are handled by Kibana to modify the query
      # before it gets sent to Elasticsearch. Context is the search
      # filter as shown above the dashboard. Timefield uses the value 
      # of the time picker from the upper right corner.
      %context%: true
      %timefield%: timestamp
      index: tw
      body: {
        size: 10000
        _source: ["timestamp", "user_lang", "country", "polarity", "lang", "sentiment"]
      }
    }
    # We only need the content of hits.hits array
    format: {property: "hits.hits"}
  }
  # Parse timestamp into a javascript date value
  transform: [
    {calculate: "toDate(datum._source['timestamp'])", as: "time"}
  ]
  # Draw a circle, with x being the time field, and y - number of bytes
  mark: line
  encoding: {
    x: {field: "time", type: "temporal"}
    y: {aggregate: "mean", field: "_source.polarity", type: "quantitative"}
  }
}

这给了我一个错误无法读取未定义的属性“极性”。一旦我摆脱聚合它就可以工作,但我想显示平均值而不是所有数据。

另外,我不知道如何访问嵌套的主题标签文本字段,我尝试过 _source.hashtags.text 但没有用:

示例文档:

{
        "_index": "tw",
        "_type": "tweet",
        "_id": "_HHWSGIBbYt8wc5TlB8B",
        "_score": 1,
        "_source": {
          "lang": "en",
          "favorited": false,
          "sentiment": "positive",
          "user_lang": "en",
          "user_screenname": "BrideWiltshire",
          "timestamp": "2018-03-21T13:54:04.928556",
          "user_follow_count": 147,
          "hashtags": [
            {
              "indices": [
                8,
                12
              ],
              "text": "WIN"
            }
          ],
          "user_stat_count": 3377,
          "user_fav_count": 11,
          "coordinates": null,
          "source": """<a href="https://panel.socialpilot.co/" rel="nofollow">SocialPilot.co</a>""",
          "subjectivity": 0.3333333333333333,
          "user_friends_count": 62,
          "polarity": 0.5333333333333333,
          "text": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
          "message": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
          "country": null,
          "user_name": "Wiltshire Bride",
          "favorite_count": 0
        }
      },

映射:

{
  "tw": {
    "mappings": {
      "tweet": {
        "properties": {
          "coordinates": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "country": {
            "type": "keyword"
          },
          "favorite_count": {
            "type": "long"
          },
          "favorited": {
            "type": "boolean"
          },
          "hashtags": {
            "properties": {
              "indices": {
                "type": "long"
              },
              "text": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "lang": {
            "type": "text"
          },
          "location": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "message": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "polarity": {
            "type": "float"
          },
          "sentiment": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "source": {
            "type": "text"
          },
          "subjectivity": {
            "type": "float"
          },
          "text": {
            "type": "text"
          },
          "time_zone": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "timestamp": {
            "type": "date"
          },
          "user": {
            "properties": {
              "favourites_count": {
                "type": "long"
              },
              "followers_count": {
                "type": "long"
              },
              "friends_count": {
                "type": "long"
              },
              "lang": {
                "type": "text"
              },
              "name": {
                "type": "text"
              },
              "screen_name": {
                "type": "text"
              },
              "statuses_count": {
                "type": "long"
              }
            }
          },
          "user_fav_count": {
            "type": "long"
          },
          "user_follow_count": {
            "type": "long"
          },
          "user_friends_count": {
            "type": "long"
          },
          "user_lang": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_screenname": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_stat_count": {
            "type": "long"
          }
        }
      }
    }
  }
}
4

1 回答 1

2

如果您的主题标签字段是嵌套类型并且 hashtags.text 是关键字字段(或具有 hashtags.text.keyword),那么您可以使用以下散点图

{
  $schema: https://vega.github.io/schema/vega-lite/v2.json
  title: hashtags vs avg_polarity
  data: {
    url: {
      index: twitter
      body: {
        size: 0
        query: {
          match_all: {}
        }
        aggs: {
          HashTags: {
            nested: {path: "hashtags"}
            aggs: {
              HashTags_Text: {
                terms: {field: "hashtags.text"}
                aggs: {
                  Tweet_Polarity: {
                    reverse_nested: {}
                    aggs: {
                      Tweet_Polarity_avg: {
                        avg: {field: "polarity"}
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    format: {property: "aggregations.HashTags.HashTags_Text.buckets"}
  }
  mark: {type: "line"}
  encoding: {
    x: {
      field: key
      type: Nominal
      axis: {title: "HashTags"}
    }
    y: {
      field: Tweet_Polarity.Tweet_Polarity_avg.value
      type: quantitative
      axis: {title: "polarity"}
    }
  }
}

有趣的小插图 编辑在此处输入图像描述

在开始添加文档之前,您必须如下指定索引映射

POST /tw
{
"mappings": {
            "tweet": {
                "properties": {
                    "favorite_count": {
                        "type": "long"
                    },
                    "favorited": {
                        "type": "boolean"
                    },
                    "hashtags": {
                        "type": "nested",
                        "properties": {
                            "indices": {
                                "type": "long"
                            },
                            "text": {
                                "type": "keyword"
                            }
                        }
                    },
                    "lang": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "message": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "polarity": {
                        "type": "float"
                    },
                    "sentiment": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "source": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "subjectivity": {
                        "type": "float"
                    },
                    "text": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "timestamp": {
                        "type": "date"
                    },
                    "user_fav_count": {
                        "type": "long"
                    },
                    "user_follow_count": {
                        "type": "long"
                    },
                    "user_friends_count": {
                        "type": "long"
                    },
                    "user_lang": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_name": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_screenname": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_stat_count": {
                        "type": "long"
                    }
                }
            }
        }
}
于 2018-04-05T04:23:12.020 回答