@@ -46,6 +46,11 @@ class Cifar10(tfds.core.GeneratorBasedBuilder):
46
46
"""CIFAR-10."""
47
47
48
48
VERSION = tfds .core .Version ("3.0.1" )
49
+ SUPPORTED_VERSIONS = [
50
+ tfds .core .Version (
51
+ "3.0.2" , experiments = {tfds .core .Experiment .METADATA : True }
52
+ ),
53
+ ]
49
54
50
55
def _info (self ):
51
56
return tfds .core .DatasetInfo (
@@ -54,6 +59,7 @@ def _info(self):
54
59
"images in 10 classes, with 6000 images per class. There "
55
60
"are 50000 training images and 10000 test images." ),
56
61
features = tfds .features .FeaturesDict ({
62
+ "id" : tfds .features .Text (),
57
63
"image" : tfds .features .Image (shape = _CIFAR_IMAGE_SHAPE ),
58
64
"label" : tfds .features .ClassLabel (num_classes = 10 ),
59
65
}),
@@ -100,19 +106,26 @@ def gen_filenames(filenames):
100
106
return [
101
107
tfds .core .SplitGenerator (
102
108
name = tfds .Split .TRAIN ,
103
- gen_kwargs = {"filepaths" : gen_filenames (cifar_info .train_files )}),
109
+ gen_kwargs = {
110
+ "split_prefix" : "train_" ,
111
+ "filepaths" : gen_filenames (cifar_info .train_files )
112
+ }),
104
113
tfds .core .SplitGenerator (
105
114
name = tfds .Split .TEST ,
106
- gen_kwargs = {"filepaths" : gen_filenames (cifar_info .test_files )}),
115
+ gen_kwargs = {
116
+ "split_prefix" : "test_" ,
117
+ "filepaths" : gen_filenames (cifar_info .test_files )
118
+ }),
107
119
]
108
120
109
- def _generate_examples (self , filepaths ):
121
+ def _generate_examples (self , split_prefix , filepaths ):
110
122
"""Generate CIFAR examples as dicts.
111
123
112
124
Shared across CIFAR-{10, 100}. Uses self._cifar_info as
113
125
configuration.
114
126
115
127
Args:
128
+ split_prefix (str): Prefix that identifies the split (e.g. "tr" or "te").
116
129
filepaths (list[str]): The files to use to generate the data.
117
130
118
131
Yields:
@@ -123,6 +136,10 @@ def _generate_examples(self, filepaths):
123
136
for path in filepaths :
124
137
for labels , np_image in _load_data (path , len (label_keys )):
125
138
record = dict (zip (label_keys , labels ))
139
+ # Note: "id" is only provided for the user convenience. To shuffle the
140
+ # dataset we use `index`, so that the sharding is compatible with
141
+ # earlier versions.
142
+ record ["id" ] = "{}{:05d}" .format (split_prefix , index )
126
143
record ["image" ] = np_image
127
144
yield index , record
128
145
index += 1
@@ -132,6 +149,11 @@ class Cifar100(Cifar10):
132
149
"""CIFAR-100 dataset."""
133
150
134
151
VERSION = tfds .core .Version ("3.0.1" )
152
+ SUPPORTED_VERSIONS = [
153
+ tfds .core .Version (
154
+ "3.0.2" , experiments = {tfds .core .Experiment .METADATA : True }
155
+ ),
156
+ ]
135
157
136
158
@property
137
159
def _cifar_info (self ):
@@ -156,6 +178,7 @@ def _info(self):
156
178
"(the class to which it belongs) and a \" coarse\" label "
157
179
"(the superclass to which it belongs)." ),
158
180
features = tfds .features .FeaturesDict ({
181
+ "id" : tfds .features .Text (),
159
182
"image" : tfds .features .Image (shape = _CIFAR_IMAGE_SHAPE ),
160
183
"label" : tfds .features .ClassLabel (num_classes = 100 ),
161
184
"coarse_label" : tfds .features .ClassLabel (num_classes = 20 ),
0 commit comments