NonKeySet
incompositekey_pipeline()
有一个值,在调用之前get_unique_df()
。
但是,NonKeySet
inget_unique_df()
没有任何价值。
这个问题与Dagster
或其他有关Python
吗?也许问题是在调用之后巧合发生的,而不是由Dagster
.
@pipeline(
mode_defs=[local_mode, prod_mode],
)
def compositekey_pipeline():
df = read_data()
#print("df.head() | read_data()", df.head()) # Empty
df, possible_key_attr_no = rearrange_n_prune_data(df)
pref_tree = create_pref_tree(df)
NonKeySet = finding_non_keys(pref_tree, possible_key_attr_no)
print("NonKeySet | compositekey_pipeline()", NonKeySet)
get_unique_df(df, NonKeySet, possible_key_attr_no)
>>>
NonKeySet | compositekey_pipeline() <dagster.core.definitions.composition.InvokedSolidOutputHandle object at 0x7f2a7ce30f40>
NonKeySet | compositekey_pipeline() <dagster.core.definitions.composition.InvokedSolidOutputHandle object at 0x7f2a7ce66bb0>
注意:NonKeySet
打印两次。指示该函数被调用两次(不知道为什么/如何)。
@solid(
input_defs=[
InputDefinition('df', dagster_pd.DataFrame),
InputDefinition('NonKeySet', Set),
InputDefinition('possible_key_attr_no', List),
InputDefinition('outfile_name', String),
]
)
def get_unique_df(context, df, NonKeySet, possible_key_attr_no, outfile_name):
print("df.head()", df.head())
print("NonKeySet | get_unique_df()", NonKeySet)
col_list, _ = get_col_list_n_map(df)
full_set_attribute = set([v for v, k in enumerate(col_list)])
KeySet = get_keyset(NonKeySet) # , possible_key_attr_no, full_set_attribute)
print("KeySet", KeySet)
_, col_map = get_col_list_n_map(df)
col_names = sorted(list(set_index_to_col_names(KeySet)), key=len) # set_index_to_col_names(KeySet, col_map)
print("col_names", col_names)
unique_df = find_uniqueness_of_keys(df, col_names)
print("unique_df.head()", unique_df.head())
unique_df['key_length'] = unique_df['Keys'].str.len()
unique_df.sort_values(
['Uniqueness %', 'key_length'], ascending=[False, True]
).head()
out_path = Path('./').joinpath('./data')
unique_df.to_csv(out_path.joinpath(f'{outfile_name}.csv'))
>>>
NonKeySet | get_unique_df() set()
Number of element in Non Key Set 0
KeySet set()
col_names []
unique_df.head() Empty DataFrame