1

我有1000张桌子,需要describe <table name>;一一检查。您能否给我一个命令来一次获取“N”个表格,而不是一个一个地运行。

4

2 回答 2

1

您可以制作一个 shell 脚本并使用参数调用它。例如,以下脚本接收模式,准备模式中的表列表,调用 DESCRIBE EXTENDED 命令,提取位置,打印模式中按名称排序的前 1000 个表的表位置。您可以修改它并将其用作单个命令:

#!/bin/bash

#Create table list for a schema (script parameter)
HIVE_SCHEMA=$1
echo Processing Hive schema $HIVE_SCHEMA...
tablelist=tables_$HIVE_SCHEMA

 hive -e " set hive.cli.print.header=false; use $HIVE_SCHEMA; show tables;" 1>  $tablelist

#number of tables
tableNum_limit=1000

#For each table do:
for table in $(cat $tablelist|sort|head -n "$tableNum_limit") #add proper sorting
 do 

 echo Processing table $table ...

     #Call DESCRIBE
     out=$(hive client -S -e "use $HIVE_SCHEMA; DESCRIBE EXTENDED $table")

     #Get location for example
     table_location=$(echo "${out}" | egrep -o 'location:[^,]+' | sed 's/location://')
     echo Table location: $table_location
     #Do something else here

done 
于 2017-05-05T12:11:42.787 回答
1

查询元存储

演示

蜂巢

create database my_db_1;
create database my_db_2;
create database my_db_3;

create table my_db_1.my_tbl_1 (i int);
create table my_db_2.my_tbl_2 (c1 string,c2 date,c3 decimal(12,2));
create table my_db_3.my_tbl_3 (x array<int>,y struct<i:int,j:int,k:int>);

MySQL(元存储)

use metastore
;

select      d.name              as db_name
           ,t.tbl_name      
           ,c.integer_idx + 1   as col_position
           ,c.column_name
           ,c.type_name
           

from                DBS         as d

            join    TBLS        as t
            
            on      t.db_id =
                    d.db_id
                    
            join    SDS         as s
            
            on      s.sd_id =
                    t.sd_id        
                    
            join    COLUMNS_V2  as c
            
            on      c.cd_id =
                    s.cd_id
                
where       d.name like 'my\_db\_%'

order by    d.name         
           ,t.tbl_name  
           ,c.integer_idx
;

+---------+----------+--------------+-------------+---------------------------+
| db_name | tbl_name | col_position | column_name |         type_name         |
+---------+----------+--------------+-------------+---------------------------+
| my_db_1 | my_tbl_1 |            1 | i           | int                       |
| my_db_2 | my_tbl_2 |            1 | c1          | string                    |
| my_db_2 | my_tbl_2 |            2 | c2          | date                      |
| my_db_2 | my_tbl_2 |            3 | c3          | decimal(12,2)             |
| my_db_3 | my_tbl_3 |            1 | x           | array<int>                |
| my_db_3 | my_tbl_3 |            2 | y           | struct<i:int,j:int,k:int> |
+---------+----------+--------------+-------------+---------------------------+
于 2017-05-05T16:03:31.970 回答