如果您需要修剪 BigQuery 表中 STRING 列的所有空格,可以使用TRIM() 函数。
使用您的示例表:
WITH table_A as
(
select " 1" as number," Male" as Sex,12 as Age,"A " as level union all
select " 2" as number," Male" as Sex,11 as Age,"A- " as level union all
select " 3" as number,"Female " as Sex,9 as Age," A " as level union all
select "4 " as number,"Female" as Sex,13 as Age," A " as level union all
select "5 " as number,"Male " as Sex,10 as Age," B" as level
)
SELECT TRIM(number), TRIM(Sex), Age, TRIM(level) from table_A
另一方面,根据这个StackOverflow 帖子,UDF 函数不能用于创建动态 SQL 语句。
尽管如此,您可以使用以下方法之一:
1-存储过程
DECLARE i INT64 DEFAULT 1;
DECLARE j INT64 DEFAULT 0;
DECLARE x INT64 DEFAULT 1;
DECLARE z INT64 DEFAULT 0;
DECLARE update_query STRING;
DECLARE string_query STRING;
DECLARE column_name STRING;
DECLARE project_id STRING;
DECLARE dataset_name STRING;
DECLARE table_name STRING;
SET project_id = '<project_id>';
SET dataset_name = '<dataset_name>';
SET table_name = '<table_name>';
SET string_query = 'CREATE OR REPLACE TEMP TABLE temp_tables AS '||
'SELECT table_name, RANK() OVER(ORDER BY table_name) rownum '||
'FROM (SELECT table_name '||
'FROM '||project_id||'.'||dataset_name||'.INFORMATION_SCHEMA.TABLES '||
'WHERE table_schema = "'||dataset_name||'"';
IF table_name <> '' THEN
SET string_query = string_query || ' AND table_name="'||table_name||'"';
END IF;
SET string_query = string_query ||')';
SELECT string_query;
EXECUTE IMMEDIATE string_query;
SET string_query = 'SELECT COUNT(*) FROM temp_tables';
EXECUTE IMMEDIATE string_query INTO j;
WHILE i<=j DO
SET table_name = (SELECT table_name FROM temp_tables WHERE rownum = i);
SET string_query = 'CREATE OR REPLACE TEMP TABLE temp_columns AS '||
'SELECT column_name, RANK() OVER(ORDER BY column_name) rownum '||
'FROM ('||
'SELECT column_name '||
'FROM '||project_id||'.'||dataset_name||'.INFORMATION_SCHEMA.COLUMNS '||
'WHERE table_name = "'||table_name||'" AND data_type="STRING")';
EXECUTE IMMEDIATE string_query;
SET string_query = 'SELECT COUNT(*) FROM temp_columns';
EXECUTE IMMEDIATE string_query INTO z;
SET update_query = "";
WHILE x<=z DO
SET column_name = (SELECT column_name FROM temp_columns WHERE rownum = x);
SET update_query = update_query||column_name||"=TRIM("||column_name||")";
IF x<z THEN
SET update_query = update_query || ',';
END IF;
SET x=x+1;
END WHILE;
SET x = 1;
SET string_query = 'UPDATE `'||project_id||'.'||dataset_name||'.'||table_name||'` SET '||update_query||' WHERE 1=1';
EXECUTE IMMEDIATE string_query;
DROP TABLE temp_columns;
SET i = i+1;
END WHILE;
DROP TABLE temp_tables;
2-动态创建句子的BigQuery 客户端库:
查询表元数据获取列名和列类型
使用此信息动态创建修剪查询语句
例如使用Python 库:
from google.cloud import bigquery
client = bigquery.Client()
def get_trim_query(project, dataset, table):
query_job = client.query(
"""
SELECT column_name, data_type
FROM {}.{}.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = '{}'""".format(project,dataset,table)
)
results = query_job.result() # Waits for job to complete.
metadata = []
for row in results:
if row.data_type == "STRING":
metadata.append("TRIM(%s)"%(row.column_name))
else:
metadata.append("%s"%(row.column_name))
query = "SELECT %s FROM {}.{}.{}".format(project, dataset, table) % ', '.join(metadata)
return query
if __name__ == "__main__":
query = get_trim_query("<PROJECT>","<DATASET>","<ORIGIN_TABLE>")
print(query)
destination_table_id = "<PROJECT>.<DATASET>.<DESTINATION_TABLE>"
job_config = bigquery.QueryJobConfig(destination=destination_table_id)
sql = query
query_job = client.query(sql, job_config=job_config) # Make an API request.
query_job.result() # Wait for the job to complete.
print("Query results loaded to the table {}".format(destination_table_id))