我对 HiveQL 很陌生,我有点卡住了:S
我有一个以下模式的表。一个名为 res 的列和三个在 partion_column 下分区的名为 filed。
create table results( res string) PARTITIONED BY (field STRING);
然后我在这个表中导入数据
insert overwrite table results PARTITION (field= 'title') SELECT explode(line) AS myNewCol FROM titles ;
insert overwrite table results PARTITION (field= 'artist') SELECT explode(line) AS myNewCol FROM artist;
insert overwrite table results PARTITION (field= 'albums') SELECT explode(line) AS myNewCol FROM albums;
我正在尝试计算三个分区中的唯一管。
例如,此命令计算数据集中某些标题的存在次数。
SELECT res, count(1) AS counttotal FROM results where field='title' GROUP BY res ORDER BY counttotal;
它输出类似
title count
Hit me Baby More time 9
如何将其扩展到元组(标题、专辑、艺术家)?如果我想有这样的输出:
title album artist count
Baby one more time hit me baby one more time britney spears 9
我的整个代码:
CREATE EXTERNAL TABLE IF NOT EXISTS hivetesttable (
xmldata STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
location '/user/sdasd/hivetestdata/';
create view xmlout(line) as select * from hivetesttable;
CREATE VIEW TITLES(line) as select xpath(line,'/MC/SC/*/@ttl') from xmlout;
CREATE VIEW ARTIST(line) as select xpath(line,'/MC/SC/*/@art') from xmlout;
CREATE VIEW ALBUMS( line) as select xpath(line,'/MC/SC/*/@art') from xmlout;
create table results( res string) PARTITIONED BY (field STRING);
insert overwrite table results PARTITION (field= 'title') SELECT explode(line) AS myNewCol FROM titles ;
insert overwrite table results PARTITION (field= 'artist') SELECT explode(line) AS myNewCol FROM artist;
insert overwrite table results PARTITION (field= 'albums') SELECT explode(line) AS myNewCol FROM albums;
SELECT res, count(1) AS counttotal FROM results where field='title' GROUP BY res ORDER BY counttotal;
一行xml数据就像
<?xml version="1.0" encoding="UTF-8"?><MC><SC><S uid="2" gen="" yr="2011" art="Samsung" cmp="<unknown>" fld="/mnt/sdcard/Samsung/Music" alb="Samsung" ttl="Over the horizon"/><S uid="37" gen="" yr="2010" art="Jason Derulo" cmp="<unknown>" fld="/mnt/sdcard/Music/Jason Derulo/Jason Derulo" alb="Jason Derulo" ttl="Whatcha Say"/><S uid="38" gen="" yr="2010" art="Jason Derulo" cmp="<unknown>" fld="/mnt/sdcard/Music/Jason Derulo/Jason Derulo" alb="Jason Derulo" ttl="In My Head"/><S uid="39" gen="" yr="2011" art="Alexandra Stan" cmp="<unknown>" fld="/mnt/sdcard/Music/Alexandra Stan/Mr_ Saxobeat - Single" alb="Mr. Saxobeat - Single" ttl="Mr. Saxobeat (Extended Version)"/><S uid="40" gen="" yr="2011" art="Bushido" cmp="<unknown>" fld="/mnt/sdcard/Music/Bushido/Jenseits von Gut und Böse (Premium Edition)" alb="Jenseits von Gut und Böse (Premium Edition)" ttl="Wie ein Löwe"/><S uid="41" gen="" yr="2011" art="Bushido" cmp="<unknown>" fld="/mnt/sdcard/Music/Bushido/Jenseits von Gut und Böse (Premium Edition)" alb="Jenseits von Gut und Böse (Premium Edition)" ttl="Verreckt"/><S uid="42" gen="" yr="2011" art="Lucenzo" cmp="<unknown>" fld="/mnt/sdcard/Music/Lucenzo/Danza Kuduro (feat_ Don Omar) [From _Fast & Furious 5_] - Single" alb="Danza Kuduro (feat. Don Omar) [From "Fast & Furious 5"] - Single" ttl="Danza Kuduro (feat. Don Omar) [From "Fast & Furious 5"]"/><S uid="121" gen="" yr="701" art="Michael Jackson" cmp="<unknown>" fld="/mnt/sdcard/external_sd/Music/Michael Jackson/Bad [Bonus Tracks]" alb="Bad [Bonus Tracks]" ttl="Voice-Over Intro/Quincy Jones Interview #1 [*]"/></SC><PC/></MC>