3

我正在我的本地机器(ubuntu)中配置扫雪机,我已经安装并配置了 scala 流收集器。下面的配置文件(collector.conf)

# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0.  You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for the Scala Stream Collector.
#
# To use, copy this to 'application.conf' and modify the configuration options.

# 'collector' contains configuration options for the main Scala collector.
collector {
  # The collector runs as a web service specified on the following
  # interface and port.
  interface = "172.16.0.157"
  port = "8080"

  # Production mode disables additional services helpful for configuring and
  # initializing the collector, such as a path '/dump' to view all
  # records stored in the current stream.
  production = true

  # Configure the P3P policy header.
  p3p {
    policyref = "/w3c/p3p.xml"
    CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
  }

  # The collector returns a cookie to clients for user identification
  # with the following domain and expiration.
  cookie {
    enabled = true
    expiration = "365 days" # e.g. "365 days"
    # Network cookie name
    name = sp
    # The domain is optional and will make the cookie accessible to other
    # applications on the domain. Comment out this line to tie cookies to
    # the collector's full domain
    domain = "com.unilog.analytics"
  }

  # The collector has a configurable sink for storing data in
  # different formats for the enrichment process.
  sink {
    # Sinks currently supported are:
    # 'kinesis' for writing Thrift-serialized records to a Kinesis stream
    # 'kafka' for writing Thrift-serialized records to kafka
    # 'stdout' for writing Base64-encoded Thrift-serialized records to stdout
    #    Recommended settings for 'stdout' so each line printed to stdout
    #    is a serialized record are:
    #      1. Setting 'akka.loglevel = OFF' and 'akka.loggers = []'
    #         to disable all logging.
    #      2. Using 'sbt assembly' and 'java -jar ...' to disable
    #         sbt logging.
    enabled = "stdout"

    kinesis {
      thread-pool-size: 10 # Thread pool size for Kinesis API requests

      # The following are used to authenticate for the Amazon Kinesis sink.
      #
      # If both are set to 'default', the default provider chain is used
      # (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
      #
      # If both are set to 'iam', use AWS IAM Roles to provision credentials.
      #
      # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
      aws {
        access-key: "collector.aws.access-key"
        secret-key: "collector.aws.secret-key"
      }

      # Data will be stored in the following stream.
      stream {
        region: "{{collector.stream.region}}"
        good: "{{collector.stream.good}}"
        bad: "{{collector.stream.bad}}"
      }

      # Minimum and maximum backoff periods
      backoffPolicy: {
        minBackoff: 3000
        maxBackoff: 600000
      }
    }

    kafka {
      brokers: "{{collectorKafkaBrokers}}"

      # Data will be stored in the following topics
      topic {
        good: "{{collectorKafkaTopicGoodName}}"
        bad: "{{collectorKafkaTopicBadName}}"
      }
    }

    # Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
    # The buffer is emptied whenever:
    # - the number of stored records reaches record-limit or
    # - the combined size of the stored records reaches byte-limit or
    # - the time in milliseconds since the buffer was last emptied reaches time-limit
    buffer {
        byte-limit: 4000000
        record-limit: 500
        time-limit: 5000
    }
  }
}

# Akka has a variety of possible configuration options defined at
# http://doc.akka.io/docs/akka/2.2.3/general/configuration.html.
akka {
#    loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging.
   loglevel = OFF
#    loggers = ["akka.event.slf4j.Slf4jLogger"]
   loggers = []
}

# spray-can is the server the Stream collector uses and has configurable
# options defined at
# https://github.com/spray/spray/blob/master/spray-can/src/main/resources/reference.conf
spray.can.server {
  # To obtain the hostname in the collector, the 'remote-address' header
  # should be set. By default, this is disabled, and enabling it
  # adds the 'Remote-Address' header to every request automatically.
  remote-address-header = on

  uri-parsing-mode = relaxed
  raw-request-uri-header = on

  # Define the maximum request length (the default is 2048)
  parsing {
    max-uri-length = 32768
  }
} 

我在我的网页中添加了下面的 javascript 跟踪器脚本

 <script type="text/javascript">
        ;(function(p,l,o,w,i,n,g){if(!p[i]){p.GlobalSnowplowNamespace=p.GlobalSnowplowNamespace||[];
        p.GlobalSnowplowNamespace.push(i);p[i]=function(){(p[i].q=p[i].q||[]).push(arguments)
        };p[i].q=p[i].q||[];n=l.createElement(o);g=l.getElementsByTagName(o)[0];n.async=1;
        n.src=w;g.parentNode.insertBefore(n,g)}}(window,document,"script","//d1fc8wv8zag5ca.cloudfront.net/2.8.0/sp.js","snowplow"));

        window.snowplow('newTracker', 'cf', '172.16.0.157:8080', { // Initialise a tracker
          appId: '1',
          cookieDomain: 'com.unilog.analytics'
        });

        window.snowplow('trackPageView');
        </script>

我收到了跟踪器以加密格式对收集器的响应。CwBkAAAACzE3Mi4xNi4yLjI2CgDIAAABXWUMPDwLANIAAAAFVVRGLTgLANwAAAAQc3NjLTAuOS4wLXN0ZG91dAsBLAAAAHNNb3ppbGxhLzUuMCAoV2luZG93cyBOVCAxMC4wOyBXaW42NDsgeDY0KSBBcHBsZVdlYktpdC81MzcuMzYgKEtIVE1MLCBsaWtlIEdlY2tvKSBDaHJvbWUvNTkuMC4zMDcxLjExNSBTYWZhcmkvNTM3LjM2CwE2AAAAOGh0dHA6Ly9sb2NhbGhvc3Q6ODA4MC9Tbm93cGxvdy9TYW1wbGVFeGFtcGxlVHJhY2tlci5odG1sCwFAAAAAAi9pCwFKAAAB43N0bT0xNTAwNjM4ODU4MjYzJmU9cHYmdXJsPWh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGU25vd3Bsb3clMkZTYW1wbGVFeGFtcGxlVHJhY2tlci5odG1sJnBhZ2U9Rml4ZWQlMjBXaWR0aCUyMDIlMjBCbHVlJnR2PWpzLTIuOC4wJnRuYT1jZiZhaWQ9MSZwPXdlYiZ0ej1Bc2lhJTJGS29sa2F0YSZsYW5nPWVuLVVTJmNzPVVURi04JmZfcGRmPTEmZl9xdD0wJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MCZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0xMzY2eDc2OCZjZD0yNCZjb29raWU9MSZlaWQ9NjQ1MGVmOTMtYmFiYy00YzYxLTgwMmQtYTkwMmQxNDFjODhiJmR0bT0xNTAwNjM4ODU4MjYxJnZwPTE1MTd4NzM1JmRzPTE0OTl4NzgzJnZpZD0xJnNpZD0zZTM3ZmYyNy1mZDI1LTRkYTgtYmY1ZC02MWRmNTkxYTUyNGYmZHVpZD01MjBiOTM4OS0zNWExLTQzNGUtOWVkYy1hOGVkMzEwZjQwNTImZnA9MzI4MDUzODQxMQ8BXgsAAAAIAAAAF0hvc3Q6IDE3Mi4xNi4wLjE1Nzo4MDgwAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAB/MTdjZjZhZjQ1NmJlYmY7IF9fcm9pdD0wOyBfX3JvaU5vbkRpcmVjdD10cnVlOyBsaXZlYWdlbnRfb3JlZj1odHRwOi8vMTcyLjE2LjAuMTU3OjgwODAveHJmMTAwLzsgbGl2ZWFnZW50X3B0aWQ9MzFjZTRkMWEtNWQzYi00Mjg0LWI1ZDEtNTYyMWViYjBlZmFjOyBsaXZlYWdlbnRfdmM9MzsgX19hdHV2Yz0zJTdDMjAlMkMyJTdDMjE7IF9faHN0Yz03NTM3MTAuOGM1NTU1NzY0ODNmMzc1MDVmNjBjYjYwODZjYjc2OWEuMTQ5NTc3NjY0Nzk0NC4xNDk1Nzc2NjQ3OTQ0LjE0OTU3NzY2NDc5NDQuMTsgaHVic3BvdHV0az04YzU1NTU3NjQ4M2YzNzUwNWY2MGNiNjA4NmNiNzY5YTsgc2YtdHJja25nY2tpZT04YWMxMDliMi05YTMzLTQxMDgtODk3ZC0xNDRmYmVkNjEyMjI7IF9fZGlzdGlsbGVyeT0zODI1YTA4X2UyZjZiNzQ0LTBkNTktNDljNy1hN2FkLTA4NDk5YWIxNzhiMy1hY2Q4OThjZGItNzE0NDk3NGRkMmRhLTYyZmY7IG11eERhdGE9bXV4X3ZpZXdlcl9pZD05NzQ5MTFjMS01ZTJiLTQ1OTYtOTE0Ny05MWU2MTVjZjQ1YjAmbXNuPTAuNDY3MzczMzUyNTYzODEyNCZzaWQ9NjlhY2VkYzAtNjQ5NC00NmM4LWFmN2UtMzdhMGVkOGRjMzRmJnNzdD0xNDk1Nzc5NTA2MDg3JnNleD0xNDk1NzgxMDA2NzczOyBfX3V0bWE9MTU0NTI5NDAwLjE2MzUxNzE5ODEuMTQ5NTE5MjEwNy4xNDk1NjIwNjQ2LjE0OTYxMjY2NzMuNTsgX191dG16PTE1NDUyOTQwMC4xNDk1NDYxMTgzLjEuMS51dG1jc3I9KGRpcmVjdCl8dXRtY2NuPShkaXJlY3QpfHV0bWNtZD0obm9uZSk7IG9nX3Nlc3Npb25faWQ9NTFkMzIwYmM4YWQ3ODMwNjI5ZDMxN2I4ZTllZGQ1NmIuMTQ0MTMzLjE0OTY2NjA0Mzc7IEFNQ1ZfRkM4MDQwM0Q1M0MzRUQ2QzBBNDkwRDRDJTQwQWRvYmVPcmc9MTA5OTQzODM0OCU3Q01DSURUUyU3QzE3MzMyJTdDTUNNSUQlN0MyMDE3NDQ2NTUzMTQyNzMzMTMzNDAwMDcwNDAwMTY2NDgwNjMyNCU3Q01DQUFNTEgtMTQ5ODAyNjQ2NSU3QzMlN0NNQ0FBTUItMTQ5ODA1NDE3MCU3Q05SWDM4V08wbjVCSDhUaC1ucUFHX0ElN0NNQ09QVE9VVC0xNDk3NDU2NTcwcyU3Q05PTkUlN0NNQ0FJRCU3Q05PTkUlN0NNQ0NJREglN0MxOTU1NjYyOTQwJTdDTUNTWU5DU09QJTdDNDExLTE3MzM5JTdDdlZlcnNpb24lN0MyLjEuMDsgX3Nkc2F0X1VzZXIgSWRlbnRpZmljYXRpb246IFRyYWNrIENvZGU9aHR0cHM6Ly93d3cuZ3JhaW5nZXIuY29tL3NlYXJjaD9zZWFyY2hCYXI9dHJ1ZSZzZWFyY2hRdWVyeT1hYmNkOyBSRVNfVFJBQ0tJTkdJRD0zOTQ3MjM1NzkwNjY2NTg7IEFNQ1ZfOTk1OTFDOEI1MzMwNkI1=

我已经安装并配置了流丰富下面是我的配置文件(enrich.conf)

# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0.  You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for Stream Enrich.

enrich {
  # Sources currently supported are:
  # 'kinesis' for reading Thrift-serialized records from a Kinesis stream
  # 'kafka' for reading Thrift-serialized records from a Kafka topic
  # 'stdin' for reading Base64-encoded Thrift-serialized records from stdin
  source = "stdin"

  # Sinks currently supported are:
  # 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another.
  # 'kafka' for writing enriched events to one Kafka topic and invalid events to another.
  # 'stdouterr' for writing enriched events to stdout and invalid events to stderr.
  #    Using "sbt assembly" and "java -jar" is recommended to disable sbt
  #    logging.
  sink = "stdouterr"

  # AWS credentials
  #
  # If both are set to 'default', use the default AWS credentials provider chain.
  #
  # If both are set to 'iam', use AWS IAM Roles to provision credentials.
  #
  # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
  aws {
    access-key: "iam"
    secret-key: "iam"
  }

  # Kafka configuration
  kafka {
    brokers: "{{enrichKafkaBrokers}}"
  }

  streams {
    in: {
      raw: "{{enrichStreamsInRaw}}"

      # Maximum number of records to get from Kinesis per call to GetRecords
      maxRecords: 10000

      # After enrichment, are accumulated in a buffer before being sent to Kinesis.
      # The buffer is emptied whenever:
      # - the number of stored records reaches record-limit or
      # - the combined size of the stored records reaches byte-limit or
      # - the time in milliseconds since it was last emptied exceeds time-limit when
      #   a new event enters the buffer
      buffer: {
        byte-limit: 4000000
        record-limit: 500 # Not supported by Kafka; will be ignored
        time-limit: 5000
      }
    }

    out: {
      enriched: "{{enrichStreamsOutEnriched}}"
      bad: "{{enrichStreamsOutBad}}"

      # Minimum and maximum backoff periods
      # - Units: Milliseconds
      backoffPolicy: {
        minBackoff: 6000
        maxBackoff: 300
      }
    }

    # "app-name" is used for a DynamoDB table to maintain stream state.
    # "app-name" is used as the Kafka consumer group ID.
    # You can set it automatically using: "SnowplowKinesisEnrich-$\\{enrich.streams.in.raw\\}"
    app-name: "{{enrichStreamsAppName}}"

    # LATEST: most recent data.
    # TRIM_HORIZON: oldest available data.
    # Note: This only effects the first run of this application
    # on a stream.
    initial-position = "TRIM_HORIZON"

    region: "{{enrichStreamsRegion}}"
  }

  # Optional section for tracking endpoints
  #monitoring {
    #snowplow {
      #collector-uri: "172.16.0.157"
      #collector-port: 8080
      #app-id: "1"
      #}
  #}
}

我正在使用以下命令运行丰富。./snowplow-stream-collector-0.9.0 --config collector.conf | /home/hadoop/snowplow/3-enrich/stream-enrich/target/scala-2.10/snowplow-stream-enrich-0.10.0 --config /home/hadoop/snowplow/3-enrich/stream-enrich/target/ scala-2.10/enrich.conf --resolver 文件:/home/hadoop/snowplow/3-enrich/config/iglu_resolver.json --enrichments 文件:/home/hadoop/snowplow/3-enrich/config/enrichments/ >> stream_enrich_log_file.txt 2>&1

但我得到以下错误

{"line":"151200121ForkJoinPool+2+worker+5ERRORcsscscalastreamScalaCollector+Failurebindingtopors=","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 1 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.170Z"}
{"line":"javalangRuntimeExceptionCommandFailedBindActorakka//scala+stream+collector/user/handler+540057214/1721601578080100L  istNonQ==","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried t  o read 2 bytes, but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your s  erver logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.180Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala118snowplow+str  eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo  sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please   check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala116snowplow+str  eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo  sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please   check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala251snowplow+stream+collector+09009","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala249snowplow+stream+collector+09009","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplCallbackRunnablerunPromisescala32snowplow+stream+collector+09009w==","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplExecutionContextImplanon3execExecutionContextImplscala107snowplow+stream+collector+09009w==","  errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 byt  es, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)  "}],"failure_tstamp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinTaskdoExecForkJoinTaskjava260snowplow+stream+collector+09009w==","errors":[{"level  ":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got   0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tst  amp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolWorkQueuerunTaskForkJoinPooljava1339snowplow+stream+collector+09009","errors":  [{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but   only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"fai  lure_tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolrunWorkerForkJoinPooljava1979snowplow+stream+collector+09009w==","errors":[{"l  evel":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only   got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure  _tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinWorkerThreadrunForkJoinWorkerThreadjava107snowplow+stream+collector+090090=","erro  rs":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes,   but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],  "failure_tstamp":"2017-07-21T09:42:06.184Z"}

我从 2 天开始就被击中了,请帮助我摆脱这个错误。

4

0 回答 0