0

这是我的蜂巢桌

create table if not exists dumdum (val map<string,map<string,struct<student_id:string,age:int>>>);
insert into dumdum select map('A',map('1',named_struct('student_id','123a', 'age',11)));
insert into dumdum select map('B',map('2',named_struct('student_id','987z', 'age',11)));
select * from dumdum;

我明白了

{"A":{"1":{"student_id":"123a","age":11}}}
{"B":{"2":{"student_id":"987z","age":11}}}

我想student_id从内部地图中提取所有内容123a,即987z. 所以这就是我想做的

select some_udf(val) from dumdum;

结果应该是

["123a","987z"]

这是我写的Java UDF

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;

import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class CustomUDF extends GenericUDF {
    private MapObjectInspector  inputMapOI                        = null;

    private Converter           inputMapKeyConverter              = null;

    private MapObjectInspector inputMapValueMapOI               = null;
    private Converter inputMapValueConverter;

    @Override
    public String getDisplayString(String[] arguments) {
        return "my udf";
    }

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 arguments are expected.");
        }

        if (!(arguments[0] instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The first parameter should be a map object ");
        }

        inputMapOI = (MapObjectInspector) arguments[0];

        ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);

        if (!(inputMapOI.getMapValueObjectInspector() instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The map value type must be a map ");
        }
        inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        ObjectInspector inputMapElementOI = inputMapValueMapOI.getMapValueObjectInspector();
        ObjectInspector outputMapElementOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

        inputMapValueConverter = ObjectInspectorConverters.getConverter(inputMapElementOI, outputMapElementOI);

        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 argument is expected.");
        }

        Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
        List<String> dataList = new ArrayList<String>();
        for (Object key : map.keySet()) {
            Map<?, ?> valueMap = this.inputMapValueMapOI.getMap(map.get(key));
            if ((valueMap == null) || (valueMap.size() == 0)) {
                continue;
            }

            for (Object value : valueMap.keySet()) {
                try{
                    String innerkey = (String) this.inputMapKeyConverter.convert(value);
                    System.out.println("Got "+innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(key));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("Got null");
                        continue;
                    }
                    for (Object struct : innerMap.keySet()) {
                    String strValue = (String) this.inputMapValueConverter.convert(struct);
                    
                    StructField str = (StructField) inputMapValueConverter.convert(innerMap.get(strValue));
                    /*
                    Not sure what to do here. Maybe 
                    str.getFieldID();
                    dataList.add(str.toString()); 
                    */
                    
                }
                }
                catch (ClassCastException c){
                    System.out.println("Got ClassCastException");
                }
            }
        }

        return dataList;
    }
}

当我调用它时

add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dumdum;

我永远不会过去

Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(inner));
                        if ((innerMap == null) || (innerMap.size() == 0)) {
                            System.out.println("Got null");
                            continue;
                        }

我可以看到的输出

System.out.println("Got "+innerKey);

在控制台上。

为什么我的转换器无法访问内部地图?

另外,一旦我能够访问内部地图,我将如何取消引用 StructField ?

更新

感谢 serge_k 的建议。恐怕我还需要一个转换器,否则我将无法获得密钥。这是我尝试过的

首先,我将第二个地图检查器和结构检查器定义为

inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        structOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

那么这就是我尝试过的

                    String innerkey = (String) inputMapKeyConverter.convert(value);
                    System.out.println(innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(innerkey));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("null inner map");
                        continue;
                    }
                    
                    for (Object struct : innerMap.keySet()) {
                        String ikey = (String) inputMapKeyConverter.convert(struct);
                        Object obj = structOI.getStructFieldData(innerMap.get(ikey), structOI.getStructFieldRef("student_id"));
                        dataList.add(obj.toString());
                    }

但我仍然看到

null inner map

我没有正确定义内部地图检查器吗?

4

1 回答 1

1

我建议您不要使用转换器,只需MapObjectInspector为内部地图定义第二个,获取外部地图值并getMap像第一个地图一样调用。要获取结构值,您需要定义StructObjectInspectorin 类型的变量initialize,例如

StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors)

然后

Object obj = soi.getStructFieldData(innerMapValue, soi.getStructFieldRef("student_id"))

更新: 尝试将映射键转换为标准键,如下所示

private Map stdKeys(Map inspectMap) {
    Map objMap = new HashMap();
    for (Object inspKey : inspectMap.keySet()) {

        Object objKey = ((PrimitiveObjectInspector) mapInspector.getMapKeyObjectInspector()).getPrimitiveJavaObject(inspKey);
        objMap.put(objKey, inspKey);

    }
    return objMap;
}

有关更多详细信息,请参阅https://github.com/klout/brickhouse/blob/master/src/main/java/brickhouse/udf/collect/MapRemoveKeysUDF.java

于 2020-07-01T16:37:00.970 回答