我想编写使用 PigServer 连接到 Cassandra 的 java 程序,而不是使用 grunt。它无法找到我为 Pig 设置的环境变量。任何形式的帮助表示赞赏。或者如果有比 Pig 和 Java map reduce 更好的选择。
这是环境变量
export PATH=/Users/rachana/software/pig-0.11.1/bin:$PATH
export PIG_HOME=/Users/rachana/software/pig-0.11.1
export PIG_CONF_DIR=/Users/rachana/software/hadoop-1.1.2/conf
export PIG_INITIAL_ADDRESS=localhost
export PIG_RPC_PORT=9160
export PIG_PARTITIONER=org.apache.cassandra.dht.Murmur3Partitioner
export PIG_OPTS=-Dudf.import.list=org.apache.cassandra.hadoop.pig:$PIG_OPTS
代码是
package com.chegg.hwh.tracking.dao;
import java.util.Properties;
import org.apache.cassandra.hadoop.pig.CassandraStorage;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.impl.PigContext;
public class HWHDataPigMapReduce {
public static void main(String args[]) throws Exception {
Properties properties = new Properties();
properties.put("PIG_HOME", "/Users/rachana/software/pig-0.11.1");
properties.put("PIG_CONF_DIR", "/Users/rachana/software/hadoop-1.1.2/conf");
properties.put("PIG_INITIAL_ADDRESS", "localhost");
properties.put("PIG_RPC_PORT", "9160");
properties.put("PIG_PARTITIONER","org.apache.cassandra.dht.Murmur3Partitioner");
PigContext pigContext = new PigContext(ExecType.LOCAL,properties);
CassandraStorage cassandraStorage = new CassandraStorage();
PigServer pigServer = new PigServer(pigContext);
pigServer.registerQuery("LOAD 'cassandra://hwh_tracking/users' USING org.apache.cassandra.hadoop.pig.CassandraStorage();");
pigServer.registerQuery("emailgroup = group rows by email;");
pigServer.dumpSchema("emailgroup");
}
}
错误是
13/07/05 16:56:19 INFO executionengine.HExecutionEngine: Connecting to hadoop file system at: file:///
2013-07-05 16:56:19.117 java[3413:1c03] Unable to load realm mapping info from SCDynamicStore
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/Users/rachana/astyanax_lib/slf4j-log4j12-1.6.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/rachana/astyanax_lib/pig-0.11.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
Exception in thread "main" org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1000: Error during parsing. Cannot get schema from loadFunc org.apache.cassandra.hadoop.pig.CassandraStorage
at org.apache.pig.PigServer$Graph.parseQuery(PigServer.java:1607)
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1546)
at org.apache.pig.PigServer.registerQuery(PigServer.java:516)
at org.apache.pig.PigServer.registerQuery(PigServer.java:529)
at com.chegg.hwh.tracking.dao.HWHDataPigMapReduce.main(HWHDataPigMapReduce.java:21)
Caused by: Failed to parse: Can not retrieve schema from loader org.apache.cassandra.hadoop.pig.CassandraStorage@beeb7e9
at org.apache.pig.parser.QueryParserDriver.parse(QueryParserDriver.java:193)
at org.apache.pig.PigServer$Graph.parseQuery(PigServer.java:1599)
... 4 more
Caused by: java.lang.RuntimeException: Can not retrieve schema from loader org.apache.cassandra.hadoop.pig.CassandraStorage@beeb7e9
at org.apache.pig.newplan.logical.relational.LOLoad.<init>(LOLoad.java:90)
at org.apache.pig.parser.LogicalPlanBuilder.buildLoadOp(LogicalPlanBuilder.java:839)
at org.apache.pig.parser.LogicalPlanGenerator.load_clause(LogicalPlanGenerator.java:3236)
at org.apache.pig.parser.LogicalPlanGenerator.op_clause(LogicalPlanGenerator.java:1315)
at org.apache.pig.parser.LogicalPlanGenerator.general_statement(LogicalPlanGenerator.java:799)
at org.apache.pig.parser.LogicalPlanGenerator.statement(LogicalPlanGenerator.java:517)
at org.apache.pig.parser.LogicalPlanGenerator.query(LogicalPlanGenerator.java:392)
at org.apache.pig.parser.QueryParserDriver.parse(QueryParserDriver.java:184)
... 5 more
Caused by: org.apache.pig.impl.logicalLayer.FrontendException: ERROR 2245: Cannot get schema from loadFunc org.apache.cassandra.hadoop.pig.CassandraStorage
at org.apache.pig.newplan.logical.relational.LOLoad.getSchemaFromMetaData(LOLoad.java:178)
at org.apache.pig.newplan.logical.relational.LOLoad.<init>(LOLoad.java:88)
... 12 more
Caused by: java.io.IOException: PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set
at org.apache.cassandra.hadoop.pig.CassandraStorage.setLocation(CassandraStorage.java:404)
at org.apache.cassandra.hadoop.pig.CassandraStorage.getSchema(CassandraStorage.java:414)
at org.apache.pig.newplan.logical.relational.LOLoad.getSchemaFromMetaData(LOLoad.java:174)
... 13 more