0

NonKeySetincompositekey_pipeline()有一个值,在调用之前get_unique_df()

但是,NonKeySetinget_unique_df()没有任何价值。

这个问题与Dagster或其他有关Python吗?也许问题是在调用之后巧合发生的,而不是由Dagster.

@pipeline(
    mode_defs=[local_mode, prod_mode],
)
def compositekey_pipeline():
    df = read_data()
    #print("df.head() | read_data()", df.head()) # Empty
    df, possible_key_attr_no = rearrange_n_prune_data(df)
    pref_tree = create_pref_tree(df)
    NonKeySet = finding_non_keys(pref_tree, possible_key_attr_no)
    print("NonKeySet | compositekey_pipeline()", NonKeySet)
    get_unique_df(df, NonKeySet, possible_key_attr_no)

>>>
NonKeySet | compositekey_pipeline() <dagster.core.definitions.composition.InvokedSolidOutputHandle object at 0x7f2a7ce30f40>
NonKeySet | compositekey_pipeline() <dagster.core.definitions.composition.InvokedSolidOutputHandle object at 0x7f2a7ce66bb0>

注意:NonKeySet打印两次。指示该函数被调用两次(不知道为什么/如何)。

@solid(
    input_defs=[
        InputDefinition('df', dagster_pd.DataFrame),
        InputDefinition('NonKeySet', Set),
        InputDefinition('possible_key_attr_no', List),
        InputDefinition('outfile_name', String),
    ]
)
def get_unique_df(context, df, NonKeySet, possible_key_attr_no, outfile_name):
    print("df.head()", df.head())
    print("NonKeySet | get_unique_df()", NonKeySet)
    col_list, _ = get_col_list_n_map(df)
    full_set_attribute = set([v for v, k in enumerate(col_list)])
    KeySet = get_keyset(NonKeySet)  # , possible_key_attr_no, full_set_attribute)
    print("KeySet", KeySet)
    _, col_map = get_col_list_n_map(df)
    col_names = sorted(list(set_index_to_col_names(KeySet)), key=len)  # set_index_to_col_names(KeySet, col_map)
    print("col_names", col_names)
    unique_df = find_uniqueness_of_keys(df, col_names)
    print("unique_df.head()", unique_df.head())
    unique_df['key_length'] = unique_df['Keys'].str.len()
    unique_df.sort_values(
        ['Uniqueness %', 'key_length'], ascending=[False, True]
    ).head()
    out_path = Path('./').joinpath('./data')
    unique_df.to_csv(out_path.joinpath(f'{outfile_name}.csv'))

>>>
NonKeySet | get_unique_df() set()
Number of element in Non Key Set 0
KeySet set()
col_names []
unique_df.head() Empty DataFrame
4

0 回答 0