elastic-stack - elastic apm-server 监控内部队列

Question

我有弹性 apm-server 在 kubernetes 中运行，配置如下

apm-server.yml

    # Source : https://github.com/elastic/apm-server/blob/master/apm-server.docker.yml
    ################################ APM Server ################################
    apm-server:
      host: "0.0.0.0:8200"
      frontend:
        enabled: false
    #================================= APM Server - ILM Index Lifecycle Management =================================
      ilm:
        enabled: "true"
        setup:
          enabled: true
          overwrite: false
          # Set `require_policy` to `false` when policies are set up outside of APM Server but referenced here.
          # Default value is `true`.
          require_policy: true
          mapping:
            - event_type: "error"
              policy_name: "apm-rollover-7-days"
              index_suffix: ""
            - event_type: "span"
              policy_name: "apm-rollover-7-days"
              index_suffix: ""
            - event_type: "transaction"
              policy_name: "apm-rollover-7-days"
              index_suffix: ""
            - event_type: "metric"
              policy_name: "apm-rollover-7-days"
              index_suffix: ""
            # - event_type: "onboarding"
            #   policy_name: "apm-rollover-7-days"
            #   index_suffix: ""
        # Configured policies are added to pre-defined default policies.
          policies:
            - name: "apm-rollover-7-days"
              policy:
                phases:
                  hot:
                    actions:
                      rollover:
                        max_size: "50gb"
                        max_age: "3d"
                      set_priority:
                        priority: 100
                  warm:
                    min_age: "5d"
                    actions:
                      set_priority:
                        priority: 50
                      readonly: {}
                  cold:
                    min_age: "7d"
                    actions:
                      set_priority:
                        priority: 0
                      freeze: {}
                  delete:
                    min_age: "10d"
                    actions:
                      delete: {}

    #================================= APM Server - RUM Real User Monitoring =================================

      # Enable Real User Monitoring (RUM) Support. By default RUM is disabled.
      # RUM does not support token based authorization. Enabled RUM endpoints will not require any authorization
      # token configured for other endpoints.
      rum:
        enabled: true
        event_rate:
          # Defines the maximum amount of events allowed to be sent to the APM Server RUM
          # endpoint per IP per second. Defaults to 300.
          limit: 300
          # An LRU cache is used to keep a rate limit per IP for the most recently seen IPs.
          # This setting defines the number of unique IPs that can be tracked in the cache.
          # Sites with many concurrent clients should consider increasing this limit. Defaults to 1000.
          lru_size: 1000

        #-- General RUM settings

        # A list of permitted origins for real user monitoring.
        # User-agents will send an origin header that will be validated against this list.
        # An origin is made of a protocol scheme, host and port, without the url path.
        # Allowed origins in this setting can have * to match anything (eg.: http://*.example.com)
        # If an item in the list is a single '*', everything will be allowed.
        allow_origins : ['*']

        # A list of Access-Control-Allow-Headers to allow RUM requests, in addition to "Content-Type",
        # "Content-Encoding", and "Accept"
        allow_headers : []

        # Custom HTTP headers to add to RUM responses, e.g. for security policy compliance.
        #response_headers :
        #  X-My-Header: Contents of the header

        # Regexp to be matched against a stacktrace frame's `file_name` and `abs_path` attributes.
        # If the regexp matches, the stacktrace frame is considered to be a library frame.
        #library_pattern: "node_modules|bower_components|~"

        # Regexp to be matched against a stacktrace frame's `file_name`.
        # If the regexp matches, the stacktrace frame is not used for calculating error groups.
        # The default pattern excludes stacktrace frames that have a filename starting with '/webpack'
        #exclude_from_grouping: "^/webpack"

        # If a source map has previously been uploaded, source mapping is automatically applied.
        # to all error and transaction documents sent to the RUM endpoint.
        source_mapping:

          # Sourcemapping is enabled by default.
          enabled: true

          # Source maps are always fetched from Elasticsearch, by default using the output.elasticsearch configuration.
          # A different instance must be configured when using any other output.
          # This setting only affects sourcemap reads - the output determines where sourcemaps are written.
          elasticsearch:
            # Array of hosts to connect to.
            # Scheme and port can be left out and will be set to the default (`http` and `9200`).
            # In case you specify and additional path, the scheme is required: `http://localhost:9200/path`.
            # IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`.
            hosts: ["elasticsearch:9200"]

            # Protocol - either `http` (default) or `https`.
            protocol: "http"

            # Authentication credentials - either API key or username/password.
            #api_key: "id:api_key"
            username: "elastic"
            password: "changeme"

          # The `cache.expiration` determines how long a source map should be cached before fetching it again from Elasticsearch.
          # Note that values configured without a time unit will be interpreted as seconds.
          cache:
            expiration: 5m

          # Source maps are stored in a separate index.
          # If the default index pattern for source maps at 'outputs.elasticsearch.indices'
          # is changed, a matching index pattern needs to be specified here.
          index_pattern: "apm-*-sourcemap*"

    #================================= APM Server - Agent Configuration =================================

      # When using APM agent configuration, information fetched from Kibana will be cached in memory for some time.
      # Specify cache key expiration via this setting. Default is 30 seconds.
      agent.config.cache.expiration: 30s
      kibana:
        enabled: true
        host: "kibana:5601"
        protocol: "http"

    #================================= General =================================

    queue:
      mem:
        events: 4096
        flush.min_events: 2048
        flush.timeout: 1s
    max_procs: 4

    #================================= Template =================================

    setup.template.enabled: true
    setup.template.name: "apm-%{[observer.version]}"
    setup.template.pattern: "apm-%{[observer.version]}-*"
    # Path to fields.yml file to generate the template.
    #setup.template.fields: "${path.config}/fields.yml"
    # Overwrite existing template.
    setup.template.overwrite: false
    # Elasticsearch template settings.
    setup.template.settings:
      # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
      index:
        number_of_shards: 1
        codec: best_compression
        number_of_routing_shards: 30
        mapping.total_fields.limit: 2000
        lifecycle.rollover_alias: apm-%{[observer.version]}-%{+yyyy.MM.dd}

    #================================ Outputs ====================================

    output.elasticsearch:
      hosts: ["elasticsearch:9200"]
      enabled: true
      compression_level: 9
      protocol: "http"
      worker: 1
      index: "apm-%{[observer.version]}-%{+yyyy.MM.dd}"
      indices:
        - index: "apm-%{[observer.version]}-sourcemap"
          when.contains:
            processor.event: "sourcemap"
        - index: "apm-%{[observer.version]}-error-%{+yyyy.MM.dd}"
          when.contains:
            processor.event: "error"
        - index: "apm-%{[observer.version]}-transaction-%{+yyyy.MM.dd}"
          when.contains:
            processor.event: "transaction"
        - index: "apm-%{[observer.version]}-span-%{+yyyy.MM.dd}"
          when.contains:
            processor.event: "span"
        - index: "apm-%{[observer.version]}-metric-%{+yyyy.MM.dd}"
          when.contains:
            processor.event: "metric"
        - index: "apm-%{[observer.version]}-onboarding-%{+yyyy.MM.dd}"
          when.contains:
            processor.event: "onboarding"

      max_retries: 3
      bulk_max_size: 60
      backoff.init: 1s
      backoff.max: 60s

    #================================= Logging =================================

    logging.level: info
    logging.selectors: ["*"]
    logging.to_syslog: false
    logging.metrics.enabled: false
    logging.metrics.period: 10s

    #=============================== HTTP Endpoint ===============================

    # Stats can be access through http://localhost:5066/stats. For pretty JSON output append ?pretty to the URL.
    http.enabled: true
    http.host: localhost
    http.port: 5066

    #============================= X-pack Monitoring =============================

    # APM server can export internal metrics to a central Elasticsearch monitoring
    # cluster.  This requires x-pack monitoring to be enabled in Elasticsearch.  The
    # reporting is disabled by default.

    # Set to true to enable the monitoring reporter.
    monitoring.enabled: true

    # Most settings from the Elasticsearch output are accepted here as well.
    # Note that these settings should be configured to point to your Elasticsearch *monitoring* cluster.
    # Any setting that is not set is automatically inherited from the Elasticsearch
    # output configuration. This means that if you have the Elasticsearch output configured,
    # you can simply uncomment the following line.
    monitoring.elasticsearch:

      # Protocol - either `http` (default) or `https`.
      protocol: "http"

      # Authentication credentials - either API key or username/password.
      # api_key: "id:api_key"
      # username: "elastic"
      # password: "changeme"

      # Array of hosts to connect to.
      # Scheme and port can be left out and will be set to the default (`http` and `9200`).
      # In case you specify and additional path, the scheme is required: `http://localhost:9200/path`.
      # IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`.
      # hosts: ["elasticsearch:9200"]

      # Set gzip compression level.
      # compression_level: 0

      # Dictionary of HTTP parameters to pass within the URL with index operations.
      #parameters:
        #param1: value1
        #param2: value2

      # Custom HTTP headers to add to each request.
      #headers:
      #  X-My-Header: Contents of the header

      # The number of times a particular Elasticsearch index operation is attempted. If
      # the indexing operation doesn't succeed after this many retries, the events are
      # dropped. The default is 3.
      # max_retries: 3

      # The maximum number of events to bulk in a single Elasticsearch bulk API index request.
      # The default is 50.
      bulk_max_size: 50

      # The number of seconds to wait before trying to reconnect to Elasticsearch
      # after a network error. After waiting backoff.init seconds, apm-server
      # tries to reconnect. If the attempt fails, the backoff timer is increased
      # exponentially up to backoff.max. After a successful connection, the backoff
      # timer is reset. The default is 1s.
      # backoff.init: 1s

      # The maximum number of seconds to wait before attempting to connect to
      # Elasticsearch after a network error. The default is 60s.
      backoff.max: 60s

      # Configure HTTP request timeout before failing an request to Elasticsearch.
      timeout: 90

      metrics.period: 10s
      state.period: 1m

我有 20 个 apm-server 实例正在运行，但提供客户端获取queue is full错误。

如何监控 apm-server 内部队列？

我启用了 metricbeat 监控，但它不显示内部队列指标。

elastic-stack - elastic apm-server 监控内部队列

0 回答 0

Related

Reference