You don't need to use Glue or MSCK REPAIR TABLE
if you are loading partitions using Partition Projection. Just run the CREATE TABLE
script once from the query editor and that should be it. If you are loading partitions using Partition Projection, you won't be able to see the partitions in the Glue Data Catalog.
or maybe the below script can help you
#Import libraries
import boto3
import datetime
#Connection for S3 and Athena
s3 = boto3.client('s3')
athena = boto3.client('athena')
#Get Year, Month, Day for partition (this will get tomorrow date's value)
date = datetime.datetime.now()
athena_year = str(date.year)
athena_month = str(date.month).rjust(2, '0')
athena_day = str(date.day + 1).rjust(2, '0')
#Parameters for S3 log location and Athena table
#Fill this carefully (Read the commented section on top to help)
s3_buckcet = 'sqladmin-cloudtrail'
s3_prefix = 'AWSLogs/XXXXXXXXXXXX/CloudTrail/'
s3_input = 's3://' + s3_buckcet + '/' + s3_prefix
s3_ouput = 's3://aws-athena-query-results-XXXXXXXXXXXXXX-us-east-1'
database = 'athena_log_database'
table_name = 'cloudtrail_logs_table'
#Executing the athena query:
def run_query(query, database, s3_output):
query_response = athena.start_query_execution(
QueryString=query,
QueryExecutionContext={
'Database': database
},
ResultConfiguration={
'OutputLocation': s3_output,
}
)
print('Execution ID: ' + query_response['QueryExecutionId'])
return query_response
#Main function for get regions and run the query on the captured regions
def lambda_handler(event, context):
result = s3.list_objects(Bucket=s3_buckcet,Prefix=s3_prefix, Delimiter='/')
for regions in result.get('CommonPrefixes'):
get_region=(regions.get('Prefix','').replace(s3_prefix,'').replace('/',''))
query = str("ALTER TABLE "+ table_name +" ADD PARTITION (region='"
+ get_region + "',year="
+ athena_year + ",month="
+ athena_month + ",day="
+ athena_day
+ ") location '"+s3_input
+ get_region
+ "/" + athena_year + "/" + athena_month + "/"
+ athena_day + "';")
#print(get_region) -- for debug
#print(query) -- for debug
run_query(query, database, s3_ouput)
You can run a glue job with a similar script to create the partitions daily. Just change the ALTER TABLE
part accordingly and it should be good to go.