语义分割数据集中将原始数据划分为73比例

我们下载公开数据集的时候,经常所有的图片是在一起的,如果我们需要进行实验的话还是需要按照73的比例将数据集划分为训练集和测试集,这里我准备了一个脚本,大家只需要传入分割之后保存的路径、原始的图像路径和原始的标签路径,即能随机划分为73比例的训练集和测试集,脚本如下:

import os
import random
import shutil

# 数据集路径
dataset_path = 'E:/EEEE-COM/toUser/toUser/train/split_data' # 分割之后数据集保存的路径
images_path = 'E:/EEEE-COM/toUser/toUser/train/cut_data/training_images' # 原始图像路径
labels_path = 'E:/EEEE-COM/toUser/toUser/train/cut_data/training_labels' # 原始标签路径

images_name = os.listdir(images_path)
images_num = len(images_name)
alpha = int(images_num * 0.7)
print(images_num)

random.shuffle(images_name)
random.shuffle(images_name)
train_list = images_name[0:alpha]
#valid_list = images_name[0:alpha1]
valid_list = images_name[alpha:]

# 确认分割正确
print('train list: ', len(train_list))
print('valid list: ', len(valid_list))

train_images_path = os.path.join(dataset_path, 'Training_Images')
train_labels_path = os.path.join(dataset_path, 'Training_Labels')
if os.path.exists(train_images_path) == False:
    os.mkdir(train_images_path)
if os.path.exists(train_labels_path) == False:
    os.mkdir(train_labels_path)

valid_images_path = os.path.join(dataset_path, 'Test_Images')
valid_labels_path = os.path.join(dataset_path, 'Test_Labels')
if os.path.exists(valid_images_path) == False:
    os.mkdir(valid_images_path)
if os.path.exists(valid_labels_path) == False:
    os.mkdir(valid_labels_path)

# 拷贝影像到指定目录
for image in train_list:
    shutil.copy(os.path.join(images_path, image), os.path.join(train_images_path, image))
    # shutil.copy(os.path.join(labels_path, image).replace("jpg", "png"), os.path.join(train_labels_path, image).replace("jpg", "png"))
    shutil.copy(os.path.join(labels_path, image), os.path.join(train_labels_path, image))

for image in valid_list:
    shutil.copy(os.path.join(images_path, image), os.path.join(valid_images_path, image))
    # shutil.copy(os.path.join(labels_path, image).replace("jpg", "png"), os.path.join(valid_labels_path, image).replace("jpg", "png"))
    shutil.copy(os.path.join(labels_path, image), os.path.join(valid_labels_path, image))

12-25 06:35