0

我在我的脚本中遇到了一个问题,老实说,我不知道去哪里寻找和/或如何解决它。

我有以下脚本。

# Source Client
connection_string = '' # The connection string for the source container
account_key = '' # The account key for the source container

# source_container_name = 'newblob' # Name of container which has blob to be copied
table_service_out = TableService(account_name='', account_key='')
table_service_in = TableService(account_name='', account_key='')

# Create client
client = BlobServiceClient.from_connection_string(connection_string) 




client = BlobServiceClient.from_connection_string(connection_string)
all_containers = client.list_containers(include_metadata=True)
for container in all_containers:
    # Create sas token for blob
    sas_token = generate_account_sas(
        account_name = client.account_name,
        account_key = account_key, 
        resource_types = ResourceTypes(object=True, container=True),
        permission= AccountSasPermissions(read=True,list=True),
        # start = datetime.now(),
        expiry = datetime.utcnow() + timedelta(hours=24) # Token valid for 4 hours
    )

    
    print("==========================")
    print(container['name'], container['metadata'])
    
    # print("==========================")
    container_client = client.get_container_client(container.name)
    # print(container_client)
    blobs_list = container_client.list_blobs()
    for blob in blobs_list:
        # Create blob client for source blob
        source_blob = BlobClient(
        client.url,
        container_name = container['name'],
        blob_name = blob.name,
        credential = sas_token
    )
        target_connection_string = ''
        target_account_key = ''
        source_container_name = container['name']
        target_blob_name = blob.name
        target_destination_blob = container['name'] + today
        print(target_blob_name)
        # print(blob.name)
        target_client = BlobServiceClient.from_connection_string(target_connection_string)
        container_client = target_client.get_container_client(target_destination_blob)
        if not container_client.exists():
            container_client.create_container()
        new_blob = target_client.get_blob_client(target_destination_blob, target_blob_name)
        new_blob.start_copy_from_url(source_blob.url)
        print("COPY TO: " + target_connection_string)
        print(f"TRY: saving blob {target_blob_name} into {target_destination_blob} ")
        # except:
        #     # Create new blob and start copy operation.
        #     new_blob = target_client.get_blob_client(target_destination_blob, target_blob_name)
        #     new_blob.start_copy_from_url(source_blob.url)
        #     print("COPY TO: " + target_connection_string)
        #     print(f"EXCEPT: saving blob {target_blob_name} into {target_destination_blob} ")
        

#query 100 items per request, in case of consuming too much menory load all data in one time
query_size = 1000

#save data to storage2 and check if there is lefted data in current table,if yes recurrence
#save data to storage2 and check if there is lefted data in current table,if yes recurrence
def queryAndSaveAllDataBySize(source_table_name, target_table_name,resp_data:ListGenerator ,table_out:TableService,table_in:TableService,query_size:int):
    for item in resp_data:
        tb_name = source_table_name + today
        #remove etag and Timestamp appended by table service
        del item.etag
        del item.Timestamp
        print("INSERT data:" + str(item) + "into TABLE:"+ tb_name)
        table_in.insert_or_replace_entity(target_table_name,item)
    if resp_data.next_marker:
        data = table_out.query_entities(table_name=source_table_name,num_results=query_size,marker=resp_data.next_marker)
        queryAndSaveAllDataBySize(source_table_name, target_table_name, data,table_out,table_in,query_size)


tbs_out = table_service_out.list_tables()
print(tbs_out)

for tb in tbs_out:
    table = tb.name + today
    #create table with same name in storage2
    table_service_in.create_table(table_name=table, fail_on_exist=False)

    #first query
    data = table_service_out.query_entities(tb.name,num_results=query_size)
    queryAndSaveAllDataBySize(tb.name, table,data,table_service_out,table_service_in,query_size)

这很好用,因为它从源存储复制所有 blob 和表并将它们粘贴到目标存储中。但是我面临的问题是,当源存储帐户容器包含一个$web如果存在的 blob 时,我的脚本会抛出一个The Resource contains an invalid character奇怪的错误,因为我检查了容器及其内容文件,并且它们都有允许的字符。

我是否遗漏了有关此配置的某些内容或做错了什么?

非常感谢您提供的任何帮助,如果您需要更多信息,请告诉我

4

0 回答 0