1+ # coding: utf8
2+ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
116import glob
217import os .path
3- # import argparse
18+ import argparse
419import warnings
520import numpy as np
621
7-
8- # def parse_args():
9- # parser = argparse.ArgumentParser(
10- # description=
11- # 'A tool for proportionally randomizing dataset to produce file lists.')
12- # parser.add_argument('dataset_root', help='the dataset root path', type=str)
13- # parser.add_argument(
14- # 'images_dir_name', help='the directory name of images', type=str)
15- # parser.add_argument(
16- # 'labels_dir_name', help='the directory name of labels', type=str)
17- # parser.add_argument(
18- # '--split', help='', nargs=3, type=float, default=[0.7, 0.3, 0])
19- # parser.add_argument(
20- # '--label_class',
21- # help='label class names',
22- # type=str,
23- # nargs='*',
24- # default=['__background__', '__foreground__'])
25- # parser.add_argument(
26- # '--separator',
27- # dest='separator',
28- # help='file list separator',
29- # default=" ",
30- # type=str)
31- # parser.add_argument(
32- # '--format',
33- # help='data format of images and labels, e.g. jpg, tif or png.',
34- # type=str,
35- # nargs=2,
36- # default=['jpg', 'png'])
37- # parser.add_argument(
38- # '--postfix',
39- # help='postfix of images or labels',
40- # type=str,
41- # nargs=2,
42- # default=['', ''])
43-
44- # return parser.parse_args()
45-
46-
47- def get_files (dataset_root , format , postfix , image_folder_path ):
48- dataset_root = "/" .join (image_folder_path .split ("/" )[:- 2 ])
22+ # TODO: assign command line argument to variable
23+ def parse_args ():
24+ parser = argparse .ArgumentParser (
25+ description =
26+ 'A tool for proportionally randomizing dataset to produce file lists.' )
27+ parser .add_argument ('dataset_root' , help = 'the dataset root path' , type = str )
28+ parser .add_argument (
29+ 'images_dir_name' , help = 'the directory name of images' , type = str )
30+ parser .add_argument (
31+ 'labels_dir_name' , help = 'the directory name of labels' , type = str )
32+ parser .add_argument (
33+ '--split' , help = '' , nargs = 3 , type = float , default = [0.7 , 0.3 , 0 ])
34+ parser .add_argument (
35+ '--label_class' ,
36+ help = 'label class names' ,
37+ type = str ,
38+ nargs = '*' ,
39+ default = ['__background__' , '__foreground__' ])
40+ parser .add_argument (
41+ '--separator' ,
42+ dest = 'separator' ,
43+ help = 'file list separator' ,
44+ default = " " ,
45+ type = str )
46+ parser .add_argument (
47+ '--format' ,
48+ help = 'data format of images and labels, e.g. jpg, tif or png.' ,
49+ type = str ,
50+ nargs = 2 ,
51+ default = ['jpg' , 'png' ])
52+ parser .add_argument (
53+ '--postfix' ,
54+ help = 'postfix of images or labels' ,
55+ type = str ,
56+ nargs = 2 ,
57+ default = ['' , '' ])
58+
59+ return parser .parse_args ()
60+
61+
62+ def get_files (path , format , postfix ):
4963 pattern = '*%s.%s' % (postfix , format )
5064
51- search_files = os .path .join (dataset_root , pattern )
52- search_files2 = os .path .join (dataset_root , "*" , pattern ) # Include subdirectories
53- search_files3 = os .path .join (dataset_root , "*" , "*" , pattern ) # Contains three levels of directories
65+ search_files = os .path .join (path , pattern )
66+ search_files2 = os .path .join (path , "*" , pattern ) # 包含子目录
67+ search_files3 = os .path .join (path , "*" , "*" , pattern ) # 包含三级目录
5468
5569 filenames = glob .glob (search_files )
5670 filenames2 = glob .glob (search_files2 )
@@ -61,12 +75,11 @@ def get_files(dataset_root, format, postfix, image_folder_path):
6175 return sorted (filenames )
6276
6377
64- def pp_generate_list (args , image_folder_path , label_folder_path , Training_Set , Val_Set , Testing_Set ):
65- separator = "args.separator"
66- dataset_root = "/" .join (image_folder_path .split ("/" )[:- 2 ])
67- all_sum_sets = Training_Set + Val_Set + Testing_Set
68- if all_sum_sets != 1.0 :
69- raise ValueError ("The sum of the division ratios must be 1" )
78+ def generate_list (args ):
79+ separator = args .separator
80+ dataset_root = args .dataset_root
81+ if sum (args .split ) != 1.0 :
82+ raise ValueError ("划分比例之和必须为1" )
7083
7184 file_list = os .path .join (dataset_root , 'labels.txt' )
7285 with open (file_list , "w" ) as f :
@@ -131,3 +144,8 @@ def pp_generate_list(args, image_folder_path, label_folder_path, Training_Set, V
131144 f .write (line )
132145 print (line )
133146 start = end
147+
148+
149+ if __name__ == '__main__' :
150+ args = parse_args ()
151+ generate_list (args )
0 commit comments