有人可以帮我理解这个功能的作用吗?
我了解行打印,但在那之后我有点迷路了。从train_data
.
def stratifiedShuffleSplit_data(X, y):
sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
for train_index, test_index in sss.split(X, y):
print("len(TRAIN):", len(train_index), "len(TEST):", len(test_index))
print("TRAIN:", train_index, "TEST:", test_index)
train_data = [df.loc[ind] for ind in train_index]
test_data = [df.loc[ind] for ind in test_index]
save_datarows(train_data, datafile+".train")
save_datarows(test_data, datafile+".test")