我需要像@Dipendra Pant 这样的想法,但他的代码对我不起作用。我认为它在 for 循环中有一些识别错误。无论如何,强烈基于他的回答,这是对我有用的解决方案:它从一个包含 5 个子文件夹(我的 5 个类)的文件夹中读取,并将所有内容保存在 3 个文件夹(train_ds、test_ds、val_ds)中,每个人都有 5 个子文件夹, 准备好使用image_dataset_from_directory和shuffle= False(改组已经在这段代码中完成)。
import os
import numpy as np
import shutil
import random
root_dir = base_folder+"input/House_Room_Dataset-5_rooms/" # for requesting directly pics
classes_dir = os.listdir(root_dir)
train_ratio = 0.6
val_ratio = 0.1
for cls in classes_dir:
os.makedirs(input_destination +'train_ds/' + cls, exist_ok=True)
os.makedirs(input_destination +'test_ds/' + cls, exist_ok=True)
os.makedirs(input_destination +'val_ds/' + cls, exist_ok=True)
# for each class, let's counts its elements
src = root_dir + cls
allFileNames = os.listdir(src)
# shuffle it and split into train/test/va
np.random.shuffle(allFileNames)
train_FileNames, test_FileNames, val_FileNames = np.split(np.array(allFileNames),[int(train_ratio * len(allFileNames)), int((1-val_ratio) * len(allFileNames))])
# save their initial path
train_FileNames = [src+'/'+ name for name in train_FileNames.tolist()]
test_FileNames = [src+'/' + name for name in test_FileNames.tolist()]
val_FileNames = [src+'/' + name for name in val_FileNames.tolist()]
print("\n *****************************",
"\n Total images: ",cls, len(allFileNames),
'\n Training: ', len(train_FileNames),
'\n Testing: ', len(test_FileNames),
'\n Validation: ', len(val_FileNames),
'\n *****************************')
# copy files from the initial path to the final folders
for name in train_FileNames:
shutil.copy(name, input_destination +'train_ds/' + cls)
for name in test_FileNames:
shutil.copy(name, input_destination +'test_ds/' + cls)
for name in val_FileNames:
shutil.copy(name, input_destination +'val_ds/' + cls)
# checking everything was fine
paths = ['train_ds/', 'test_ds/','val_ds/']
for p in paths:
for dir,subdir,files in os.walk(input_destination + p):
print(dir,' ', p, str(len(files)))