我想解析这个 xml 文件:-
<?xml version="1.0" encoding="UTF-8"?>
<ArtistData>
<ArtistPerson>
<ArtistId>ARXJJSN1187B98CB37</ArtistId>
<ArtistName>Rajan</ArtistName>
<Age>23</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>ARNUFGE1187B9B7881</ArtistId>
<ArtistName>Lily</ArtistName>
<Age>25</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>AR7K9W71187B9AF065</ArtistId>
<ArtistName>John</ArtistName>
<Age>30</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>ARHYUI71187FB48366</ArtistId>
<ArtistName>Marie</ArtistName>
<Age>34</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>ARZEAO01187B998042</ArtistId>
<ArtistName>Clarie</ArtistName>
<Age>26</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>AR8L6W21187B9AD317</ArtistId>
<ArtistName>Shobhit</ArtistName>
<Age>35</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>ARTG85X1187B99D1F5</ArtistId>
<ArtistName>Adnan</ArtistName>
<Age>39</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>ART9VTZ1187FB48DDC</ArtistId>
<ArtistName>Shaan</ArtistName>
<Age>42</Age>
</ArtistPerson>
<ArtistPerson>
<ArtistId>AROSQW61187FB5330B</ArtistId>
<ArtistName>Javed</ArtistName>
<Age>38</Age>
</ArtistPerson>
</ArtistData>
</ArtistData>
这就是我创建表并将数据加载到其中的方式:-
Create table artist_data_xml (artist_id string , artists_name string, age int)
row format SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
WITH SERDEPROPERTIES (
"column.xpath.artist_id"="/ArtistPerson/ArtistId/text()",
"column.xpath.artist_name"="/ArtistPerson/ArtistName/text()",
"column.xpath.age"="/ArtistPerson/Age/text()")
STORED AS INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
TBLPROPERTIES (
"xmlinput.start"="<ArtistPerson>",
"xmlinput.end"="</ArtistPerson>"
);
LOAD DATA local inpath '/home/goyal_rahul31/test_data/artist_data.xml' into table artist_data_xml;
当我试图通过表格进行选择时,出现以下错误:
蜂巢>从艺术家数据xml中选择*;失败并出现异常 java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException: 评估艺术家姓名时出错