CREATE TABLE `cluster_diagnostic_report`(
`run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',
`execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',
`module` string COMMENT 'Test Case Module',
`expected_result` string COMMENT 'Test Case Module expected Result',
`actual_result` string COMMENT 'Test Case Module actual Result',
`validation_result` string COMMENT 'Test Case Module validation Result',
`start_time` string COMMENT 'Test Case Module Start Time',
`end_time` string COMMENT 'Test Case Module Elapsed Time',
`elapsed_time` string COMMENT 'from deserializer',
`total_time_seconds` int COMMENT 'total elapsed time for this step')
PARTITIONED BY (
`cluster_name` string,
`rptg_dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
从上面的内容中,我只需要获取分区列的名称和类型。对于上面的示例,我想获取如下详细信息:
col_name = cluster_name, type = string
rptg_dt= cluster_name, type = string
我尝试过的内容如下所示,它返回无:
partitionResult = re.match(r"PARTITIONED\s\w+\s\((\n){2}",line)
if partitionResult == None:
pass
else:
print(partitionResult.group(1),sep='\t')
谁能建议该怎么做?