我正在使用 Jupyter 笔记本从共享 Glue 表中读取数据(使用 LakeFormation)。我正在使用awswrangler
图书馆。我能够从示例数据库中读取测试表。请注意,local_db
下面提到的数据库是在我运行此查询的同一 AWS 账户中本地创建的。该lf_shared_table
表是共享表的资源链接。
import awswrangler as wr
df = wr.athena.read_sql_query(sql="SELECT * FROM lf_shared_table", database="local_db")
错误
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/_config.py in wrapper(*args_raw, **kwargs)
437 del args[name]
438 args = {**args, **keywords}
--> 439 return function(**args)
440
441 wrapper.__doc__ = _inject_config_doc(doc=function.__doc__, available_configs=available_configs)
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/athena/_read.py in read_sql_query(sql, database, ctas_approach, categories, chunksize, s3_output, workgroup, encryption, kms_key, keep_files, ctas_database_name, ctas_temp_table_name, ctas_bucketing_info, use_threads, boto3_session, max_cache_seconds, max_cache_query_inspections, max_remote_cache_entries, max_local_cache_entries, data_source, params, s3_additional_kwargs, pyarrow_additional_kwargs)
878 s3_additional_kwargs=s3_additional_kwargs,
879 boto3_session=session,
--> 880 pyarrow_additional_kwargs=pyarrow_additional_kwargs,
881 )
882
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/athena/_read.py in _resolve_query_without_cache(sql, database, data_source, ctas_approach, categories, chunksize, s3_output, workgroup, encryption, kms_key, keep_files, ctas_database_name, ctas_temp_table_name, ctas_bucketing_info, use_threads, s3_additional_kwargs, boto3_session, pyarrow_additional_kwargs)
593 use_threads=use_threads,
594 s3_additional_kwargs=s3_additional_kwargs,
--> 595 boto3_session=boto3_session,
596 )
597
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/athena/_read.py in _resolve_query_without_cache_regular(sql, database, data_source, s3_output, keep_files, chunksize, categories, encryption, workgroup, kms_key, wg_config, use_threads, s3_additional_kwargs, boto3_session)
508 boto3_session=boto3_session,
509 categories=categories,
--> 510 metadata_cache_manager=_cache_manager,
511 )
512 return _fetch_csv_result(
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/athena/_utils.py in _get_query_metadata(query_execution_id, boto3_session, categories, query_execution_payload, metadata_cache_manager)
259 _query_execution_payload: Dict[str, Any] = query_execution_payload
260 else:
--> 261 _query_execution_payload = wait_query(query_execution_id=query_execution_id, boto3_session=boto3_session)
262 cols_types: Dict[str, str] = get_query_columns_types(
263 query_execution_id=query_execution_id, boto3_session=boto3_session
~/anaconda3/envs/python3/lib/python3.6/site-packages/awswrangler/athena/_utils.py in wait_query(query_execution_id, boto3_session)
795 _logger.debug("StateChangeReason: %s", response["Status"].get("StateChangeReason"))
796 if state == "FAILED":
--> 797 raise exceptions.QueryFailed(response["Status"].get("StateChangeReason"))
798 if state == "CANCELLED":
799 raise exceptions.QueryCancelled(response["Status"].get("StateChangeReason"))
QueryFailed: HIVE_METASTORE_ERROR: Required Table Storage Descriptor is not populated. (Service: null; Status Code: 0; Error Code: null; Request ID: null; Proxy: null)
任何的意见都将会有帮助。