用python制作训练集和测试集的图片名列表文本

 # -*- coding: utf-8 -*-

 from pathlib import Path  #从pathlib中导入Path

 import os

 import fileinput

 import random

 root_path='/home/tay/Videos/trash/垃圾分类项目/total/'

 train =  open('./trash_train.txt','a')

 test = open('./trash_test.txt','a')

 pwd = os.getcwd() +'/'# the val data path 训练集的路径

 def gen_txt():

     i =0

     for file in os.listdir(root_path):

         print('file is{}'.format(str(file)))

         for init in os.listdir(os.path.join(root_path, file)): #子文件夹

             print('init is{}'.format(str(init)))

             i += 1

             pathDir = os.listdir(os.path.join(root_path, file, init)) #

             print('pathDir is', pathDir)

             file_num = len(pathDir)

             rate = 0.2

             pick_num = int(file_num * rate)

             sample = random.sample(pathDir, pick_num)  #随机选取20%的pathDir字符串

             print('sample is', sample)

             for pick_name in sample:

                 test.write(root_path.split('total/')[-1] +file + '/' + init +'/' + pick_name + ' ' + str(i) + '\n')

             # for name in pathDir: #文件夹中的图片名

                 # print('name is{}'.format(str(name)))

                 # if test

                 # total.write(root_path.split('total/')[-1] +file + '/' + init +'/' + name + ' ' + str(i) + '\n' )

             same = [x for x in pathDir if x in sample] #列表中相同的内容

             diff = [y for y in (sample + pathDir) if y not in same] #列表中不同的内容

             print('different', diff)

             print('same', same)

             for train_name in diff:

                 train.write(root_path.split('total/')[-1] +file + '/' + init +'/' + train_name + ' ' + str(i) + '\n')

 gen_txt()

采用了random.sample函数来随机选取特定数量的文件名作为测试集，通过比较两个列表中不同的元素来获取训练集的文件名。

总体上就是在进行字符串操作。