我目前可以提供 2 个选项,但没有一个是完美的。
顺便说一句,"output.format.string"
已经过时并且没有效果。
1
create external table mytable
(
q1 string
,field1 string
,q2 string
,field2 string
,q3 string
,field3 string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ('input.regex' = '.*?=(?<q1>"?)(.*?)(?:\\k<q1>)\\|.*?=(?<q2>"?)(.*?)(?:\\k<q2>)\\|.*?=(?<q3>"?)(.*?)(?:\\k<q3>)')
stored as textfile
;
select * from mytable
;
+----+--------+----+--------+----+-----------+
| q1 | field1 | q2 | field2 | q3 | field3 |
+----+--------+----+--------+----+-----------+
| | value2 | | value2 | " | va , lues |
+----+--------+----+--------+----+-----------+
2
create external table mytable
(
field1 string
,field2 string
,field3 string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ('input.regex' = '.*?=(".*?"|.*?)\\|.*?=(".*?"|.*?)\\|.*?=(".*?"|.*?)')
stored as textfile
;
select * from mytable
;
+--------+--------+-------------+
| field1 | field2 | field3 |
+--------+--------+-------------+
| value2 | value2 | "va , lues" |
+--------+--------+-------------+